In [1]:
import pprint
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
from matplotlib_venn import venn3_circles
from matplotlib_venn import venn2
from matplotlib_venn import venn2_circles

# modules
import numpy as np
import matplotlib.pyplot as plt
from   scipy import optimize
from scipy.optimize import curve_fit
import sys

# Load custom scripts in reusable_code folder
sys.path.append(r'/home/jupyter/reusable_code')

import google_api_functions as gaf

from google.cloud import bigquery

creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bq = bigquery.Client(project='itv-bde-analytics-prd',credentials=creds)

# 3 way overlap between New SI, Classic SI and Trailers

In [2]:

query='''with overlap2 as 
(select coalesce(a.britbox_id,b.britbox_id) as britbox_id,
ifnull(Viewed_New,0) as Viewed_New,
ifnull(Viewed_Classic,0) as Viewed_Classic
from
  (select distinct britbox_id, 1 as Viewed_New
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='New Spitting Image') a
  full join
  (select distinct britbox_id, 1 as Viewed_Classic
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='Classic Spitting Image') b
  on a.britbox_id=b.britbox_id
  )
  
,overlap3 as  
  (select coalesce(a.britbox_id,b.britbox_id) as britbox_id,
  ifnull(Viewed_New,0) as Viewed_New,
  ifnull(Viewed_Classic,0) as Viewed_Classic,
  ifnull(Viewed_Trailer,0) as Viewed_Trailer
  from overlap2  a
  full join
  (select distinct britbox_id, 1 as Viewed_Trailer
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='Spitting Image Trailers') b
 on a.britbox_id=b.britbox_id)

,agg as 
(select Viewed_New,Viewed_Classic
,Viewed_Trailer
,count(distinct britbox_id) as N
from overlap3
group by 1,2,3)

select *, N/sum(N) over () as PC
from agg
order by 3,2,1'''

In [3]:
df = bq.query(query).to_dataframe()
df

In [4]:

overlap_dict=df.to_dict()
pc_list=tuple([round(overlap_dict['PC'][x]*100,1) for x in overlap_dict['PC'] ])
labels=tuple([str(x).replace('_',' ') for x in df.columns[:3]])

v=venn3(subsets = pc_list, set_labels = labels,set_colors=['red','orange','purple'])
plt.title='Hurrah'
plt.savefig(r'/home/jupyter/si_venns/3way Venn- SI, Classic, Trailer.jpg')

plt.show()
print('As a sanity check, the percentages add up to ',np.sum(df.PC))

df

In [5]:
# 2 way overlap between New SI & Classic SI

In [10]:

query='''with overlap2 as 
(select coalesce(a.britbox_id,b.britbox_id) as britbox_id,
ifnull(Viewed_New,0) as Viewed_New,
ifnull(Viewed_Classic,0) as Viewed_Classic
from
  (select distinct britbox_id, 1 as Viewed_New
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='New Spitting Image') a
  full join
  (select distinct britbox_id, 1 as Viewed_Classic
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='Classic Spitting Image') b
  on a.britbox_id=b.britbox_id
  )
  

,agg as 
(select Viewed_New,Viewed_Classic
,count(distinct britbox_id) as N
from overlap2
group by 1,2)

select *, N/sum(N) over () as PC
from agg
order by 2,1'''
df = bq.query(query).to_dataframe()
df

In [11]:

overlap_dict=df.to_dict()
pc_list=tuple([round(overlap_dict['PC'][x]*100,1) for x in overlap_dict['PC'] ])
labels=tuple([str(x).replace('_',' ') for x in df.columns[:3]])

v=venn2(subsets = pc_list, set_labels = labels,set_colors=['red','orange','purple'])
plt.title='Hurrah'
plt.savefig(r'/home/jupyter/si_venns/2way Venn- SI, Classic.jpg')

plt.show()
print('As a sanity check, the percentages add up to ',np.sum(df.PC))

df

In [45]:
# 2 way overlap between New SI and New Aqcns

In [8]:

query='''with overlap2 as 
(select coalesce(a.britbox_id,b.britbox_id) as britbox_id,
ifnull(Viewed_New,0) as Viewed_New,
ifnull(Acquired_Since_PR,0) as Acquired_Since_PR
from
  (select distinct britbox_id, 1 as Viewed_New
  from `britbox_analytics.Spitting_Image_Viewers`
  where prog_type='New Spitting Image') a
  full join
  (select distinct itvID as britbox_id, 1 as Acquired_Since_PR
  from `britbox_analytics.Spitting_Image_Acqns`
  where when_acquired in ('Pre-launch-period','After SI launch')) b
  on a.britbox_id=b.britbox_id
  )
  

,agg as 
(select Viewed_New,Acquired_Since_PR
,count(distinct britbox_id) as N
from overlap2
group by 1,2)

select *, N/sum(N) over () as PC
from agg
order by 2,1'''
df = bq.query(query).to_dataframe()
df

In [9]:

overlap_dict=df.to_dict()
pc_list=tuple([round(overlap_dict['PC'][x]*100,1) for x in overlap_dict['PC'] ])
labels=tuple([str(x).replace('_',' ') for x in df.columns[:3]])

v=venn2(subsets = pc_list, set_labels = labels,set_colors=['red','orange','purple'])
plt.title='Hurrah'
plt.savefig(r'/home/jupyter/si_venns/2way Venn- SI, Classic.jpg')

plt.show()
print('As a sanity check, the percentages add up to ',np.sum(df.PC))

df