In [1]:
# modules
import numpy as np
import matplotlib.pyplot as plt
from   scipy import optimize
import pandas as pd
from scipy.optimize import curve_fit
import scipy.stats as stats
import sys

# Install/import plotly packages- this package has lots of graphical properties
import plotly.graph_objects as go
import plotly.offline as pyo

# Load custom scripts in reusable_code folder
sys.path.append(r'/home/jupyter/reusable_code')

import google_api_functions as gaf

from google.cloud import bigquery
from datetime import date

import re
import networkx as nx
import time

In [5]:
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bq = bigquery.Client(project='itv-bde-analytics-dev',credentials=creds)


def ViewingTurf(fromdate,todate,nitems):
    query="""create or replace table `britbox_sandbox.ViewingTURF_Not` as
    with topshows as
    (select ccid.programme_id,
    title.programme,
    count(distinct kpi_user.requested) as viewers
    from `itv-bde-analytics-prd.britbox_mart.Viewing_clean`
    where event_partition between '{0}' and '{1}'
    and ccid.programme_id is not null
    group by 1,2
    )
    ,ranked as (select *,
    row_number() over (order by viewers desc) as N
    from topshows)

    select * from ranked where N<={2}
    order by viewers desc""".format(fromdate,todate,nitems)
    topshows = bq.query(query).to_dataframe()
    
    
    # Create Turf table
    query="""create or replace Table `britbox_sandbox.ViewingTURF`  (programme_id string, programme string,viewers int64,N int64);"""
    bq.query(query)

    i = 0
    while i < nitems:
        print(i)
        print("Running")
        query="""
        insert into `britbox_sandbox.ViewingTURF` 
                 with reached_viewers as
                (select distinct kpi_user.requested as custid from 
                `itv-bde-analytics-prd.britbox_mart.Viewing_clean`
                where event_partition between '{0}' and '{1}'
                and ccid.programme_id in (select programme_id from `britbox_sandbox.ViewingTURF`)
                )

                ,topshows as
                (select ccid.programme_id,
                title.programme,
                count(distinct kpi_user.requested) as viewers
                from `itv-bde-analytics-prd.britbox_mart.Viewing_clean` a
                left join reached_viewers b 
                on a.kpi_user.requested=b.custid
                where a.event_partition between '{0}' and '{1}'
                and b.custid is null -- Exclude viewers reached already
                and ccid.programme_id is not null
                and ccid.programme_id not in (select programme_id from `britbox_sandbox.ViewingTURF`)
                group by 1,2
                )
                ,ranked as (select *,
                row_number() over (order by viewers desc) as Row_N
                from topshows)

                select programme_id,programme, viewers, {2} as N from ranked where Row_N=1
                order by viewers desc;
                    """.format(fromdate,todate,i+1)
        insert_statement=bq.query(query)

        while insert_statement.done()==False:# Wait for insert job to finish
            time.sleep(0.5)
        
        query="""select count(*) as N from `britbox_sandbox.ViewingTURF`"""
        res= bq.query(query).to_dataframe()
        print(res['N'][0])
        i =res['N'][0]
        print("Now i is {}".format(i))
    
    print("Finished!")


In [6]:
results=ViewingTurf('2020-01-01','2021-01-01',15)
results

### As above but excluding New Spitting Image

In [4]:
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bq = bigquery.Client(project='itv-bde-analytics-dev',credentials=creds)


def ViewingTurf(fromdate,todate,nitems):
    #######################
    # Base Table
    #######################
    print('Creating base table')
    query="""create or replace table `britbox_sandbox.ViewingTURF_Base` as
    select kpi_user.requested as bbid,
    ccid.programme_id ,
    title.programme ,
    count(distinct ccid.coalesced_episode_id) as eps
    from `itv-bde-analytics-prd.britbox_mart.Viewing_clean`
    where event_partition between '{0}' and '{1}'
        and ccid.programme_id is not null
        and ccid.programme_id !='67p7jyj' -- Exclude Spitting Image
    group by 1,2,3
    having eps>=2
    """.format(fromdate,todate)
    
    basetable_created= bq.query(query)
    while basetable_created.done()==False:# Wait for insert job to finish
            time.sleep(0.5)
    
    if basetable_created.done()==True:
        print('Base table created')
    else:
        print('Base table not created, I don''t know why this is running')
        
        
        
    #######################
    # Calculating top shows
    #######################
    print('Now calculating top shows')
    query="""create or replace table `britbox_sandbox.ViewingTURF_Not` as
    with topshows as
    (select programme_id,
    programme,
    count(distinct bbid) as viewers
    from `britbox_sandbox.ViewingTURF_Base`
    group by 1,2
    )
    ,ranked as (select *,
    row_number() over (order by viewers desc) as N
    from topshows)

    select * from ranked where N<={0}
    order by viewers desc""".format(nitems)
    topshows = bq.query(query).to_dataframe()
    
    print('Now Top shows calculated, now creating a Turf placeholder table')
    
    # Create Turf table
    query="""create or replace Table `britbox_sandbox.ViewingTURF`  (programme_id string, programme string,viewers int64,N int64);"""

    placeholder_table= bq.query(query)
    while placeholder_table.done()==False:# Wait for insert job to finish
            time.sleep(0.5)
    if basetable_created.done()==True:
        print('Placeholder table created, moving onto loop')
    else:
        print('Placeholder table not created, I don''t know why this is running')

    
    i = 0
    while i < nitems:
        print("Running loop {}".format(i))
        query="""
        insert into `britbox_sandbox.ViewingTURF` 
                 with reached_viewers as
                (select distinct bbid as custid from 
                `britbox_sandbox.ViewingTURF_Base`
                where programme_id in (select programme_id from `britbox_sandbox.ViewingTURF`)
                )

                ,topshows as
                (select programme_id,
                programme,
                count(distinct bbid) as viewers
                from `britbox_sandbox.ViewingTURF_Base` a
                left join reached_viewers b 
                on a.bbid=b.custid
                where 
                 b.custid is null -- Exclude viewers reached already
                and programme_id not in (select programme_id from `britbox_sandbox.ViewingTURF`)
                group by 1,2
                )
                ,ranked as (select *,
                row_number() over (order by viewers desc) as Row_N
                from topshows)

                select programme_id,programme, viewers, {0} as N from ranked where Row_N=1
                order by viewers desc;
                    """.format(i+1)
        insert_statement=bq.query(query)

        while insert_statement.done()==False:# Wait for insert job to finish
            time.sleep(0.5)
        
        if insert_statement.done()==True:
            print('Insert step finished, moving on')
        else:
            print('Insert step not finished, I don''t know why this is running')

        query="""select count(*) as N from `britbox_sandbox.ViewingTURF`"""
        res= bq.query(query).to_dataframe()
        i =res['N'][0]
        print("i updated, now i is {}".format(i))
    
    print("Finished!")

results=ViewingTurf('2020-01-01','2021-01-01',15)
results