In [3]:
# Set up modules for Google functionality
from google.cloud import bigquery # To run BQ statements
from google_auth_oauthlib import flow # To authorise as user
from googleapiclient.discovery import build # To pull in from sheets, slides etc. API
from google.auth.transport.requests import Request

# Display
import pprint

# Operating system stuff
import pickle
import os.path
import sys

# Data handling
import json
import requests
from pandas import read_csv
from pandas import datetime

# Stats, models, datasheets
import pandas as pd
import pyreadstat

# Visualisation
from matplotlib import pyplot
import matplotlib.pyplot as plt
import matplotlib_venn # For venn diagrams
from pandas.plotting import autocorrelation_plot

# Network graphs
import networkx as nx


# Misc
from xlsxwriter.utility import xl_rowcol_to_cell # Used to create cell references
import itertools

# Load custom scripts in reusable_code folder
sys.path.append(r'/home/jupyter/reusable_code')

import google_api_functions as gaf


In [4]:
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bq = bigquery.Client(project='itv-bde-analytics-dev',credentials=creds)

In [5]:
query='''with progs as (select distinct britbox_id, title.programme
from `itv-bde-analytics-dev.britbox_sandbox.CJ_viewing_in_trial`
where TYPE_FLAG='Show'
)

,self_joined as (select
a.britbox_id,a.programme as prog1,
b.programme as prog2
from progs a
inner join
progs b
using (britbox_id)
)


,summary1 as (select prog1,prog2,count(distinct britbox_id) as N_custs from self_joined
group by 1,2)

,summary2 as (select prog1, count(distinct britbox_id) as N_custs from self_joined group by 1)

select summary1.*,
summary1.N_custs/summary2.N_custs as PC_viewers,
summary2.N_custs as num_viewers_prog1
from summary1
left join
summary2
on summary1.prog1=summary2.prog1'''

prog_overlap_df = bq.query(query).to_dataframe()
prog_overlap_df.head()

In [6]:
pivot_df=prog_overlap_df[(prog_overlap_df['prog1'].notna()==True)&(prog_overlap_df['prog2'].notna()==True)].pivot(index='prog1', columns='prog2', values='PC_viewers')

In [7]:
pivot_df.head()

In [None]:
def colorin(x):
    if x>0.8:
        formatting='background-color : yellow'
    elif x>0.3:
        formatting='background-color : green'
    else:
        formatting=''
    return formatting

pivot_df\
.style.applymap(lambda x:colorin(x))

In [10]:
filtered_df=prog_overlap_df[(prog_overlap_df['prog1'].notna()==True)&(prog_overlap_df['prog2'].notna()==True) &(prog_overlap_df['prog1']!=prog_overlap_df['prog2'])\
                           &(prog_overlap_df['PC_viewers']>.2)&(prog_overlap_df['num_viewers_prog1']>1000)]

print("Filtered down from {} to {} records".format(len(prog_overlap_df),len(filtered_df)))

In [11]:
pivot_df=filtered_df.pivot(index='prog1', columns='prog2', values='PC_viewers')

In [16]:
pivot_df['prog1']=pivot_df.index
col_list=[i for i in pivot_df.columns if i!='prog1']
col_list.insert(0,'prog1')
pivot_df=pivot_df[col_list]
pivot_df.head()

In [18]:
gaf.Write_whole_df_to_gsheet(creds, pivot_df, '1FMazIlXs4noQAgpE3VoSqrPIM6ZpQei3_HCAbpEjy1c','Sheet3', valueInputOption='RAW', append_overwrite='overwrite', headers='Y', topleftcell='A1')