In [17]:
# modules
import numpy as np
import matplotlib.pyplot as plt
from   scipy import optimize
import pandas as pd
from scipy.optimize import curve_fit
import scipy.stats as stats
import sys

# Install/import plotly packages- this package has lots of graphical properties
import plotly.graph_objects as go
import plotly.offline as pyo

# Load custom scripts in reusable_code folder
sys.path.append(r'/home/jupyter/reusable_code')

import google_api_functions as gaf

from google.cloud import bigquery
from datetime import date

import re
import networkx as nx

In [60]:
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bb_palette={'Sunset Red':'#D2525D', 'Dorset Blue':'#4BA0B8','Union Blue':'#083560','Sunset Red Highlight':'#FF8F99',\
           'Dorset Blue Highlight':'#5CCAE5','Union Blue Highlight':'#80E4FF','Raspberry':'#993F64'\
            ,'Orange':'#E0835E','Apricot':'#E8B35D','Apple':'#72B58C','Dark Storm':'#1E2023','Mild Storm':'#282B31'\
           ,'Light Storm':'#3E434B','Dark Cloud':'#CDD2D8','Light Cloud':'#E8EDF2','White':'#FFFFFF'}


In [6]:
# Extracting data to determine LTV
bq = bigquery.Client(project='itv-bde-analytics-prd',credentials=creds)
magics.context.credentials = creds  #apply these credentials to the BQ magic syntax too


In [15]:
query="""with entitlements_flows as
(select billingprovider, Last_status as From_status,status as to_status,
CONCAT(eventSubType.eventDescription , ' (',ifnull(TRIM(JSON_EXTRACT(raw,'$.eventType'),"\\""),concat('DERIVED:',eventtype)),')') as SVOD_event_received
,case when eventSubType.eventDescription in ('never seen this combo before','outside agreed journey set') then 1 else 0 end as exclude_flag
,count(*) as N
from `itv-bde-analytics-prd.britbox_analytics.entitlements` 
group by 1,2,3,4,5)
select *,
N/sum(N) over (partition by billingProvider,from_status) as PC_transitions
from entitlements_flows where exclude_flag=0
--select exclude_flag, sum(N)/(Select sum(N) from entitlements_flows) as N from entitlements_flows group by 1 --99% journeys mapped
order by 1,2,6 desc"""
df = bq.query(query).to_dataframe()

In [16]:
df

In [114]:
sub_df=df[df['billingprovider']=='BT']
nodes=list(set(sub_df['From_status'].to_list()+sub_df['to_status'].to_list()))

In [115]:
edge_list=sub_df[['From_status','to_status','SVOD_event_received','PC_transitions']].apply(tuple, axis=1).tolist()#.to_dict('records')


In [127]:
G=nx.Graph()

In [128]:
G.add_nodes_from(nodes)

In [129]:
for i in edge_list:
    G.add_edge(i[0],i[1],weight=i[3],label=i[2])

In [131]:
#for i in nx.lexicographical_topological_sort(G):
#    print("{} has {} ancestors and {} descendants".format(i,len(nx.ancestors(G,i)),len(nx.descendants(G,i))))

In [119]:
nx.is_directed_acyclic_graph(G)

In [120]:
nx.is_aperiodic(G)

In [121]:
pos=nx.random_layout(G)
nx.draw(G,pos)

In [124]:
        
axsize = plt.subplots(figsize=(15,15))[1]# Set graph size for picture
axsize.set_facecolor(bb_palette['Light Cloud']) # Set graph background colour
plt.axis('off')


nx.draw_networkx_labels(G,pos=pos,label_pos=-1,font_size=12,font_color=bb_palette['Dark Storm'], font_family='monserrat')
nx.draw_networkx_nodes(G,pos=pos,node_color=bb_palette['Dorset Blue'],ax=axsize,node_shape="s")

nx.draw_networkx_edges(G,pos=pos,edge_color=bb_palette['Union Blue'],width=[i[3]*5 for i in edge_list])
nx.draw_networkx_edge_labels(G,pos,edge_labels={(i[0],i[1]):i[2] for i in edge_list})