In [2]:
# modules
import numpy as np
import matplotlib.pyplot as plt
from   scipy import optimize
import pandas as pd
from scipy.optimize import curve_fit
import scipy.stats as stats
import sys

# Install/import plotly packages- this package has lots of graphical properties
import plotly.graph_objects as go
import plotly.offline as pyo

# Load custom scripts in reusable_code folder
sys.path.append(r'/home/jupyter/reusable_code')

import google_api_functions as gaf

from google.cloud import bigquery
from datetime import date

import re
import networkx as nx
import itertools
import time
import math

In [3]:
creds=gaf.Authenticate_Google(r"/home/jupyter/reusable_code/") #GAF is a package steve created with a list of useful functions
bq = bigquery.Client(project='itv-bde-analytics-dev',credentials=creds) #Apply credentials to BQ client "bq"

# Read in the FTS table created in BQ
query="""
select distinct clusterNum from `itv-bde-analytics-dev.britbox_sandbox.SW_Viewing_Programme_Score_5`
"""
df = bq.query(query).to_dataframe()

# Step 1: Define Players
A player is a member of the coalition whose relative value you are trying to establish

In [4]:
playerList=[i for item in df.values.tolist() for i in item]
playerList

### Establish some key properties

In [5]:
def PermCombin(n,r,repetitionAllowed=False,orderMatters=False):
    # The difference between permutations and combinations is that order matters in permutations but not in combinations 
    if repetitionAllowed==True and orderMatters==True:
        # Permutations with Repetition 
        result=n**r
    elif repetitionAllowed==False and orderMatters==True:
        # Permutations without Repetition
        result=math.factorial(n)/math.factorial(n-r)
        
    elif repetitionAllowed==True and orderMatters==False:
        # Combinations with Repetition 
        result=math.factorial(r+n-1)/(math.factorial(n-1)*math.factorial(r))
    elif repetitionAllowed==False and orderMatters==False:
        # Combinations without Repetition
        result=math.factorial(n)/(math.factorial(n-r)*math.factorial(r))
    
    return result

    
numPlayers=len(playerList) # How many players are there? 
numPossiblePermutations=PermCombin(numPlayers,numPlayers,repetitionAllowed=False,orderMatters=True) # Unique number of possible permutations (without repetition)= n!
numPermutationsperComboNum=numPossiblePermutations/numPlayers # How many permutations where a player is first, second, etc.

# e.g. A B C can have the following permutations
# ABC, ACB, BAC, BCA, CAB, CBA
# This is 6 permutations, across 3 players. Each player is in position 1 twice, position 2 twice and position 3 twice
# i.e. 6/3

# Generate unique combinations
Shapley values rely on permutations, in so much as the consider the incremental value of adding a player to the existing coalition at a specific point, e.g. Nothing>>B is not the same as A>>AB

In most practical examples though, the complete value of the prior coalition is the same i.e. AB=BA.
Therefore the delta of e.g. AB>>ABC or BA>>BAC is the same, so we can run the combination (AB) rather than the permutations (AB and BA) to get the incremental value of C with fewer runs


In [6]:
combos=[] # Initialise an empty list
for n in range(0,numPlayers+1): # loop through numbers 0 to N
    combos.append([x for x in itertools.combinations(playerList,n)]) # Return all n-wise combinations of variables
flattened_combos=[i for item in combos for i in item if len(i)>0] # Double list comprehension to flatten so each list entry is a unique combination


# Run the "value" function
For each combination of players, we need to ascertain the "value" of that combination. This might be running a regression, another model, or something simpler.
We then need to store the results of each run in a table.

In [32]:
# Create a table to hold the results with two columns: Combination and Value
query="""
        create or replace table `britbox_sandbox.SW_Combinations_QualitySegments` (Combination string, Value float64)"""
bq.query(query)

# Loop through each combo
combodict={}
for combo in flattened_combos:
    
    # Create a key ID, this will be useful later
    combokey=', '.join('\'{0}\''.format(w) for w in combo) # Store as a string
    
    # Create a dictionary with the string value as the key, but the underlying list members preserved for pythonic use
    combodict[combokey]=combo 
    
    # Run the function taking the combination as it's input and store in the results table
    query="""
        insert into `britbox_sandbox.SW_Combinations_QualitySegments`
        select "{0}" as combination, count(distinct britbox_id) as Value
        
        from `itv-bde-analytics-dev.britbox_analytics.Viewing_clean` a
        inner join `itv-bde-analytics-dev.britbox_sandbox.SW_Viewing_Programme_Score_5` b
        on a.ccid.programme_id=b.programme_id and a.title.programme=b.programme
        
        where a.event_partition>='2020-01-01' and a.event_partition <'2021-01-01'
        and cast(ClusterNum as string) in ({0})        
        """.format(combokey)
    print(bq.query(query).result)

# Get the comparison combination
Shapley values are a weighted average of combinations with and without a player.

E.g. to get the value of C you do:
ABC vs AB
AC vs A
BC vs B
C vs nothing

In [33]:
# Generate table of the combinations for each brand with and without the brand
shapleydict=[]

for player in playerList: # For each Player
    for key in combodict: # For each combination of players
        if player in combodict[key]:
            without_combokey=None #Initialise as empty
            with_combokey=key # Name/ ID of the combination containing the brand
            with_combo=combodict[key] # List values of the combination containing the brand
            without_combo=[x for x in combodict[key] if x!=player] # List values of the same combination not including the brand

            for otherkey in combodict:
                if set(combodict[otherkey])==set(without_combo):
                    without_combokey=otherkey
            if without_combokey==None:
                without_combokey=''
            shapleydict.append({'player':player,'withkey':with_combokey,'withoutkey':without_combokey})


In [34]:
# Store the list of variants with and without each player in a DataFrame
df_for_bq=pd.DataFrame.from_dict(shapleydict,orient='columns')
df_for_bq

In [35]:
# Upload the list of variants with and without each player to BigQuery 
dataset=bq.dataset('britbox_sandbox')
table_ref = dataset.table("SW_Combinations_Shapley_QualitySegments")

try:
    bq.delete_table(table_ref)
except:
    pass
job = bq.load_table_from_dataframe(df_for_bq, table_ref)

job.result()  # Waits for table load to complete.
print("Loaded dataframe to {}".format(table_ref.path))

In [36]:
time.sleep(20) #Wait 20 seconds for stuff to finish loading

# Get the coalition strengths with and without a player. 
# Do this all in one go via a join rather than a loop for efficiency.
# Only loopey bit needs to be the original population of values

query="""
        select a.*, ifnull(b.Value,0) as WithValue, ifnull(c.Value,0) as WithoutValue,
        ifnull(b.Value,0) - ifnull(c.Value,0) as IncrementalValue
        from `britbox_sandbox.SW_Combinations_Shapley_QualitySegments` a
        left join 
         `britbox_sandbox.SW_Combinations_QualitySegments` b
         on a.withkey=b.Combination
        
        left join 
         `britbox_sandbox.SW_Combinations_QualitySegments` c
         on a.withoutkey=c.Combination
        """
# Pull to a dataframe
df = bq.query(query).to_dataframe()

In [37]:
df

# Get Weightings
Where we used combinations, not permutations, for efficiency earlier on (because we said AB=BA, so don't bother running both), it creates an uneven weighting.
For example to get the value of C, technically we should analyse all complete permutations and the incremental value that C brought to them each:
1) ABC
2) ACB
3) BAC
4) BCA
5) CAB
6) CBA

In reality we don't have 6 records, we have
Nothing >> C. This is permutations 5 and 6 above. In both cases, C is the first addition to the coalition.
A>> AC  (permutation 2)
B>> BC (permutation 4) . In both permutations 2 and 4, C is the second addition, but there are two distinct combinations.
AB>> ABC (permutations 1 and 3)

So in my dataset I have 4 from>>to combinations, spanning 6 underlying permutations. Therefore the combinations representing 2 "true" permutations need weights to reflect this.
Nothing >> C =2/6 =0.333
A>> AC = 1/6
B>> BC =1/6
AB>> ABC =2/6
Sum of weights = 1


To calculate this at scale we can say:
"Ok, we know there are however many permutations in total (N!). 
We know that something can come in position 1, 2... N. and will do so in each of those positions  N!/N times (or (N-1)! times). How many combinations were there with something in that position?"

In example above:
N=3
Permutations = 3! = 6
Times in each position = 6/3 = 2  (or 2!)

For position 1, there is one combination (C) where C comes first. Therefore we do (2/1)/6
For position 2, there are two combinations (AC, BC) where C comes second, so we do (2/2)/6
For position 3, there is one combination  (ABC) where C comes third, so we do (2/1)/6




In [38]:

df['listlength']=df.withkey.str.count(',')+1 # How many items in the combo
# Get the number of combinations that are 1 item long, 2 items long, 3 items long etc.
df['numcombos'] = df.groupby(['player','listlength'])['withkey'].transform('count') # Equivalent to count(withkey) over (partition by brand, listlength)
df['permutationsPerCombo'] = numPermutationsperComboNum/df['numcombos']
df['Weighting']=df['permutationsPerCombo']/numPossiblePermutations
df['WeightedIncremental']=df['Weighting']*df['IncrementalValue']

In [39]:
# Calculate relative contributions

In [40]:
Contributions=df.groupby(['player'])['WeightedIncremental','Weighting'].sum() # Absolute impact, and check weightings add to 1
Contributions['pct']=Contributions['WeightedIncremental']/Contributions['WeightedIncremental'].sum() # PC contribution
Contributions.sort_values(by='WeightedIncremental',ascending=False)

In [41]:
Contributions.sort_values(by='WeightedIncremental',ascending=False)['WeightedIncremental'].plot(kind='pie',label='Contribution to Incremental Viewing in 2020',figsize=(10,10))

In [42]:
# QA
# Get combination with all players


In [43]:
Contributions['WeightedIncremental'].sum()-df['WithValue'].max() # Should be value as largest coalition should explain same as sum of players' proportionate combinations

In [18]:
query="""
select  
case when branded_as in ('BBC','ITV','C4','C5','BRITBOX') then branded_as
else 'Other' end as branded_as, count(distinct britbox_id) as Viewers

from `itv-bde-analytics-dev.britbox_analytics.Viewing_clean`
        where event_partition>='2020-01-01'
group by 1

"""
non_uniqueReach = bq.query(query).to_dataframe()
non_uniqueReach

In [19]:
TotalContributions=non_uniqueReach.merge(Contributions,left_on='branded_as',right_on='player')[['branded_as','Viewers','WeightedIncremental']]
TotalContributions['% Incremental']=TotalContributions['WeightedIncremental']/TotalContributions['Viewers']
TotalContributions

In [24]:

.plot(kind='bar',xticks='branded_as')