In [1]:
import pandas as pd
import sqlite3
pd.set_option('display.max_colwidth', -1)

catalog_df=pd.read_csv(r"http://itv-catalog.massive-itv.com/catalog.csv")
conn = sqlite3.connect('SQL_connection1.db') #Create a connection object

# Put the pandas table into the connection object
try:
    try:
        query=conn.cursor()
        query.execute('''drop table Catalogue_info''')
        query.fetchall()
        query.close()
    except:
        pass
    catalog_df.to_sql('Catalogue_info',con=conn)
except:
    pass

query=conn.cursor()

try:
    query.execute('''drop table eps''')
except:
    pass
    
query.execute("""create table eps as 
select 
ParentId
,Id as EpisodeID
,EpisodeNumber
,MetaDuration 
,MediaDuration
, ItemType
 ,CustomId
 ,MetaCreditsCast
 ,MetaCreditsCrew
from Catalogue_info
where ItemType='episode' """)

try:
    query.execute('''drop table seasons''')
except:
    pass
    
query.execute("""create table seasons as 
select ParentId
,ID
,Title as Series_number_text
,SeasonNumber as Series_number
,ItemType
, CustomId
 ,MetaCreditsCast
 ,MetaCreditsCrew
from Catalogue_info
where ItemType='season' """)

try:
    query.execute('''drop table shows''')
except:
    pass

query.execute("""
create table shows as 
select Title as programme_name
,Id as Programme_ID
,ItemType as Programme_Type
,null as Series_number_text
,null as Series_number
,null as Series_ID
,null as Episode_ID
,null as Episode_Number
,null as Meta_Duration 
,null as Media_Duration
,Id
,CustomId
, MetaCreditsCast
 ,MetaCreditsCrew
from Catalogue_info
where ItemType not in ('season','episode') 
""")


try:
    query.execute('''drop table seasons2''')
except:
    pass

query.execute("""create table seasons2 as
select shows.programme_name
,shows.Programme_ID
,seasons.ItemType as Programme_Type
,seasons.Series_number_text
,seasons.Series_number
,seasons.Id as Series_ID
,null as Episode_ID
,null as Episode_Number
,null as Meta_Duration 
,null as Media_Duration
,seasons.ID
,seasons.CustomId 
,seasons.MetaCreditsCast
 ,seasons.MetaCreditsCrew
from seasons
left join shows
 on seasons.parentid=shows.id
 """)

try:
    query.execute('''drop table episodes2''')
except:
    pass

query.execute("""create table episodes2 as
select
seas.programme_name
,seas.Programme_ID
,eps.ItemType as Programme_Type
,seas.Series_number_text
,seas.Series_number
,seas.Series_ID
,eps.EpisodeID as Episode_ID
,eps.EpisodeNumber as Episode_Number
,eps.MetaDuration as Meta_Duration 
,eps.MediaDuration as Media_Duration
,eps.EpisodeID as ID
,eps.CustomId 
,eps.MetaCreditsCast
,eps.MetaCreditsCrew
 from eps
 left join
 seasons2 seas
 on eps.parentId=seas.id""")



try:
    query.execute('''drop table catalogue_cleaned''')
except:
    pass

query.execute("""create table catalogue_cleaned as
select * from shows
union all
select * from seasons2
union all 
select * from episodes2
""")

query.fetchall()
query.close()



tidy_table=pd.read_sql_query("""select * from catalogue_cleaned""", conn)


In [2]:
tidy_table

In [3]:
show_list_of_dicts=tidy_table.to_dict(orient='records')
show_list_of_dicts[0].keys()

In [4]:
talent_nodes=[]
show_nodes=[]
edges=[]

import re
for n,i in enumerate(show_list_of_dicts):
    if i['Programme_Type'] not in ['show','season']:
        if i['MetaCreditsCast'] and len(i['MetaCreditsCast'])>0:
            show_name=i['programme_name'].strip()
            print(show_name)
            show_nodes.append(show_name)
            cast_list=re.split(',',i['MetaCreditsCast'])
            for j in cast_list:
                talent_name=re.split(' as ',j)[0].strip()
                print(talent_name)
                talent_nodes.append(talent_name)
                edges.append((talent_name,show_name))


In [5]:
talent_map_df = pd.DataFrame(edges,columns=['Talent','Show'])
#.columns(['Talent','Show'])

In [6]:
conn = sqlite3.connect('SQL_connection1.db') #Create a connection object

# Put the pandas table into the connection object
query=conn.cursor()
try:
    query.execute('''drop table talent_map''')
except:
    pass
talent_map_df.to_sql('talent_map',con=conn)

query.fetchall()
query.close()


talent_edges=pd.read_sql_query("""
select a.show, a.talent,
cast(a.n_eps as real)/b.total_num_eps as Pc_episodes
from
    (select 
    show, talent, count(*) as N_eps
    from talent_map
    group by 1,2) a
inner join
    (select programme_name as show, 
    count(*) as total_num_eps
    from catalogue_cleaned 
    where Programme_Type not in ('show','season')
    group by 1) b
on a.show=b.show
""", conn)




In [7]:
edges = [tuple(x) for x in talent_edges.values]
edges


In [71]:
import networkx as nx
import matplotlib.pyplot as plt

In [72]:
G=nx.Graph()
G.add_nodes_from(talent_nodes, bipartite='talent')
G.add_nodes_from(show_nodes, bipartite='show')
G.add_weighted_edges_from(edges)

In [80]:
G.nodes(data=True)

In [107]:
nx.shortest_paths.generic.shortest_path
#talent_path=nx.all_simple_paths(G, 'Dominique Moore', 'Dan Linney', cutoff=None)
talent_path=nx.shortest_simple_paths(G, 'Anna Friel', 'John Simm',cutoff=6)

for n,path in enumerate(talent_path):
    print("Path {}".format(n))
    print(path)

In [106]:
talent_path=nx.all_shortest_paths(G, 'Dominique Moore', 'Dan Linney')
for n,i in enumerate(talent_path):
    if n==0:
        subgraph_value=G.subgraph(i)
        positioning=nx.random_layout(subgraph_value)
        nx.draw_networkx_labels(subgraph_value,pos=positioning)
        nx.draw_networkx_nodes(subgraph_value,pos=positioning)
        nx.draw_networkx_edges(subgraph_value,pos=positioning)
        break

In [110]:
for i in nx.all_shortest_paths(G, 'Anna Friel', 'John Simm'):
    print(i)

In [113]:
for i in nx.all_simple_paths(G, 'Anna Friel', 'John Simm',cutoff=6):

    print(i)


In [159]:
from_person='Anna Friel'
to_person='John Simm'
num_steps=4
axsize = plt.subplots(figsize=(15,15))[1]
axsize.set_facecolor(bb_palette['Light Cloud'])
plt.axis('off')
import itertools
flattened = list(set(list(itertools.chain.from_iterable(nx.all_simple_paths(G, from_person, to_person,cutoff=num_steps)))))
subgraph_value=G.subgraph(flattened)
show_nodes_subgraph=G.subgraph([i for i in flattened if i in show_nodes])
talent_nodes_subgraph=G.subgraph([i for i in flattened if i in talent_nodes])

positioning=nx.bipartite_layout(subgraph_value,show_nodes_subgraph)
positioning=nx.random_layout(subgraph_value)
positioning=nx.spring_layout(subgraph_value)

nx.draw_networkx_labels(subgraph_value,pos=positioning,label_pos=-1,font_size=12,\
                        font_color=bb_palette['Dark Storm'],font_family='monserrat')
nx.draw_networkx_nodes(subgraph_value,pos=positioning,nodelist=show_nodes_subgraph,node_color=bb_palette['Dorset Blue'],ax=axsize,node_shape="s")
nx.draw_networkx_nodes(subgraph_value,pos=positioning,nodelist=talent_nodes_subgraph,node_color=bb_palette['Sunset Red'],ax=axsize,node_shape="*")
nx.draw_networkx_edges(subgraph_value,pos=positioning,edge_color=bb_palette['Union Blue'])



filename='{} to {} in {}.png'.format(from_person, to_person, num_steps)
plt.savefig(filename)

In [98]:
# Write full network graph
nx.write_graphml(G,r"/Users/stepwate/Desktop/TalentGraphMassive.xml")

In [None]:
nx.draw(G)
>>> plt.savefig("path.png")


In [147]:
bb_palette={'Sunset Red':'#D2525D', 'Dorset Blue':'#4BA0B8','Union Blue':'#083560','Sunset Red Highlight':'#FF8F99',\
           'Dorset Blue Highlight':'#5CCAE5','Union Blue Highlight':'#80E4FF','Raspberry':'#993F64'\
            ,'Orange':'#E0835E','Apricot':'#E8B35D','Apple':'#72B58C','Dark Storm':'#1E2023','Mild Storm':'#282B31'\
           ,'Light Storm':'#3E434B','Dark Cloud':'#CDD2D8','Light Cloud':'#E8EDF2','White':'#FFFFFF'}