In [35]:
import numpy as np
import operator

In [17]:
import csv
import networkx as nx
import matplotlib.pyplot as plt

graph = {}

G = None
G=nx.Graph()

with open('casts.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=';', quotechar='"')
    
    for row in reader:
        if row[1] is not "" and row[2] is not "" and row[1] != "s a" and row[2] != "s a": 
            movie = row[1]

            #because csv in sorted according name of film
            if movie not in graph:
                graph[movie] = set()

            graph[movie].add(row[2]) # add actor
            

In [18]:
for movie in graph:
    for actor1 in graph[movie]:
        for actor2 in graph[movie]:
            if (actor1 is not actor2):
                G.add_edge(actor1, actor2)

In [19]:
# plt.figure(figsize=(20,10))
# pos = nx.spring_layout(G)
# nx.draw(G, pos,
#         labels={v:str(v) for v in G},
#         cmap = plt.get_cmap("bwr"),
#         node_color=[G.degree(v) for v in G],
#         font_size=12
#        )
# plt.show()

In [20]:
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 16281
Number of edges: 152266
Average degree:  18.7047


In [21]:
nx.density(G)

0.0011489402865853943

In [22]:
nx.number_of_nodes(G)

16281

In [23]:
sorted(nx.degree_centrality(G).items(), key=lambda x:x[1],reverse=True)[:10]

[('Humphrey Bogart', 0.02635135135135135),
 ('James Stewart', 0.02291154791154791),
 ('Gary Cooper', 0.022665847665847665),
 ('John Gielgud', 0.022665847665847665),
 ('John Carradine', 0.02248157248157248),
 ('Peter Lorre', 0.022051597051597052),
 ('C.Aubrey Smith', 0.020638820638820637),
 ('Henry Fonda', 0.01984029484029484),
 ('Burt Lancaster', 0.019103194103194103),
 ('Basil Rathbone', 0.01898034398034398)]

In [24]:
communities = nx.clustering(G)

In [25]:
communities

{'Jane greer': 1.0,
 'Goetz Otto': 1.0,
 'Georgina Hale': 1.0,
 'Tony Vilar': 0.0,
 'Stefania Casini': 0.5238095238095238,
 'Cathy Motiarty': 1.0,
 'Deborah Rush': 1.0,
 'Jim Abbott': 1.0,
 'Judith Evelyn': 0.41,
 'A.B. Imenson': 1.0,
 'William E. Shay': 1.0,
 'Steve Geray': 0.22072072072072071,
 'Richard Shull': 1.0,
 'Canmpell Scott': 1.0,
 'Daniel Roebuck': 1.0,
 'Bill C': 1.0,
 'Lila Lee': 0.3954022988505747,
 'Lucia Zanussi': 1.0,
 'Frederick Piper': 0.72,
 'Jason Miller': 0.27205882352941174,
 'Dewey Martin': 1.0,
 'Ken Wahl': 0.1695906432748538,
 'Art Metrano': 0.3787878787878788,
 'Toni Plana': 1.0,
 'Fritz Leiber': 0.34061458718992965,
 'William Windom': 0.18285714285714286,
 'Sydney Arnold': 1.0,
 'Keith Baxter': 0.6060606060606061,
 'Gunther Kieslich': 0.0,
 'Richard Arlen': 0.2483130904183536,
 'Buddy Baer': 0.6257309941520468,
 'Sharon Stone': 0.1451360650609947,
 'Gene Roth': 1.0,
 'Mary Miles': 1.0,
 'Warren Cosell': 1.0,
 'Cindy Morgan': 1.0,
 'Jules Dassin': 0.46969696

In [26]:
grouped_communities = {}
for actor, key in communities.items():
    if key not in grouped_communities:
        grouped_communities[key] = set()

    grouped_communities[key].add(actor) # add actor

In [27]:
max_group = []
for key, group in grouped_communities.items():
    if len(group) > len(max_group):
        max_group = group

In [28]:
len(max_group)

9982

In [30]:
kevin = 'Kevin Bacon'
dist = {}
sum_dist = 0
for actor in G:
    if nx.has_path(G, actor, kevin) and actor is not kevin:
        path = nx.shortest_path(G, actor, 'Kevin Bacon')
        dist[actor] = path
        sum_dist += len(path)

print(sum_dist / len(dist))

4.1517881332972095


In [38]:
bacon_numbers = {}
# coputed demanding
for actor1 in G:
    counter = 0
    sum_dist = 0
    for actor2 in G:
        if nx.has_path(G, actor1, actor2) and actor1 is not actor2:
            path = nx.shortest_path(G, actor1, actor2)
            counter += 1
            sum_dist += len(path)
    if actor1 not in bacon_numbers:
        bacon_numbers[actor1] = (sum_dist / counter)

In [36]:
eigenvector = np.array(sorted(nx.eigenvector_centrality(G, max_iter=200).items(), key=operator.itemgetter(1), reverse=True))
eigenvector[:10]

array([['C.Aubrey Smith', '0.10528209757852323'],
       ['John Carradine', '0.09890108262349977'],
       ['James Stewart', '0.09305015308449284'],
       ['Peter Lorre', '0.09255693271404085'],
       ['John Gielgud', '0.09158287381618666'],
       ['Basil Rathbone', '0.08982489646183787'],
       ['Gary Cooper', '0.08939051353229847'],
       ['David Niven', '0.08746568401219347'],
       ['Andy Devine', '0.08729150852907477'],
       ['Humphrey Bogart', '0.08503272445149634']], 
      dtype='<U37')

In [37]:
betweennes = np.array(sorted(nx.betweenness_centrality(G, k=200).items(), key=operator.itemgetter(1), reverse=True))
betweennes[:10]

array([['Humphrey Bogart', '0.017610208558268385'],
       ['Gene Hackman', '0.01391599589094254'],
       ['Burt Lancaster', '0.01322871167054944'],
       ['Vincent Price', '0.011760001176388427'],
       ['John Carradine', '0.011408244471061216'],
       ['Keenan Wynn', '0.011363517694990641'],
       ['James Stewart', '0.01099101157989122'],
       ['Robert deNiro', '0.010317115956787484'],
       ['David Niven', '0.010137944793591806'],
       ['Donald Sutherland', '0.008847459344095839']], 
      dtype='<U37')

In [31]:
nx.write_gexf(G, "export.gexf")