In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
from statistics import mean
import json
import pickle

In [25]:
# Load Movies Graph and relabel nodes
Movies_Graph = nx.read_adjlist("data/movies_graph.adjlist")
mapping = {}
for node in Movies_Graph:
    mapping[node] = str(node).replace("_", " ")


Movies_Graph=nx.relabel_nodes(Movies_Graph, mapping)

# Load Writers Graph and relabel nodes
Writers_Graph = nx.read_adjlist("data/writers_graph.adjlist")
mapping = {}
for node in Writers_Graph:
    mapping[node] = str(node).replace("_", " ")


Writers_Graph=nx.relabel_nodes(Writers_Graph, mapping)

In [49]:
all_writers = list(Writers_Graph.nodes())
all_writers_sorted = sorted(all_writers, key=lambda x: Writers_Graph.degree(x), reverse=True)

all_movies = list(Movies_Graph.nodes())
all_movies_sorted = sorted(all_movies, key=lambda x: Movies_Graph.degree(x), reverse=True)    

# Writers ordered by most connections
print("Top 10 Writers by number of connections via movies:")
for i in range(10):
    print(f"\t{all_writers_sorted[i]}, connections: {Writers_Graph.degree(all_writers_sorted[i])}")

print("")
    
# Movies ordered by most connections
print("Top 10 Movies by number of connections via writers:")
for i in range(10):
    print(f"\t{all_movies_sorted[i]}, connections: {Movies_Graph.degree(all_movies_sorted[i])}") 

Top 10 Writers by number of connections via movies:
	Steven Spielberg, connections: 22
	Ridley Scott, connections: 20
	Ronald Shusett, connections: 19
	David Koepp, connections: 19
	Dan O'Bannon, connections: 17
	Michael Mann, connections: 16
	Brian De Palma, connections: 16
	James Cameron, connections: 15
	Martin Scorsese, connections: 15
	George Lucas, connections: 15

Top 10 Movies by number of connections via writers:
	Indiana Jones and the Kingdom of the Crystal Skull, connections: 22
	Mission: Impossible, connections: 20
	Raiders of the Lost Ark, connections: 20
	Jurassic Park, connections: 18
	The Lost World: Jurassic Park, connections: 18
	War of the Worlds, connections: 18
	Indiana Jones and the Last Crusade, connections: 17
	Aliens, connections: 16
	Minority Report, connections: 16
	Indiana Jones and the Temple of Doom, connections: 16


In [84]:
with open('data/director_summary.p', 'rb') as fp:
    director_data = pickle.load(fp)

In [85]:
def summarize_data(stats=None):
    print(f"Director: {stats['director']}")
    print(f"Movies total: {stats['number_of_movies']}")
    print(f"Average number of characters: {mean(stats['number_of_characters'])}")
    print(f"Average number of communities: {mean(stats['number_of_communities'])}")
    print(f"Average clustering: {mean(stats['clustering'])}")
    print(f"Average modularity: {mean(stats['modularity'])}")
    print(f"Average diameter: {mean(stats['diameter'])}")
    print(f"Average density: {mean(stats['density'])}")

In [92]:
def summarize_director(director="Quentin Tarantino"):
    summarize_data(director_data[director])

In [89]:
# sorted(director_data, key=lambda x: mean(list(director_data[x]["number_of_communities"])), reverse=True)

In [93]:
# High Diameter vs low Diameter
summarize_director("David Lynch")
print("")
summarize_director("Christopher Nolan")

Director: David Lynch
Movies total: 6
Average number of characters: 43
Average number of communities: 2.5
Average clustering: 0.6263333333333333
Average modularity: 0.4215
Average diameter: 4.333333333333333
Average density: 0.10566666666666667

Director: Christopher Nolan
Movies total: 2
Average number of characters: 21
Average number of communities: 2
Average clustering: 0.5965
Average modularity: 0.2595
Average diameter: 3.0
Average density: 0.17049999999999998


In [94]:
# High number of comunities vs Low number of communities
summarize_director("Quentin Tarantino")
print("")
summarize_director("Ridley Scott")

Director: Quentin Tarantino
Movies total: 5
Average number of characters: 29
Average number of communities: 3.2
Average clustering: 0.7674
Average modularity: 0.3732
Average diameter: 3.8
Average density: 0.1714

Director: Ridley Scott
Movies total: 9
Average number of characters: 33
Average number of communities: 2.111111111111111
Average clustering: 0.6277777777777778
Average modularity: 0.3391111111111111
Average diameter: 3.888888888888889
Average density: 0.16444444444444445


In [95]:
# High number of characters vs Low number of characters
summarize_director("George Lucas")
print("")
summarize_director("Christopher Nolan")

Director: George Lucas
Movies total: 2
Average number of characters: 47
Average number of communities: 2.5
Average clustering: 0.6745
Average modularity: 0.363
Average diameter: 3.5
Average density: 0.1265

Director: Christopher Nolan
Movies total: 2
Average number of characters: 21
Average number of communities: 2
Average clustering: 0.5965
Average modularity: 0.2595
Average diameter: 3.0
Average density: 0.17049999999999998


In [96]:
# High clustering vs Low clustering
summarize_director("Quentin Tarantino")
print("")
summarize_director("David Cronenberg")

Director: Quentin Tarantino
Movies total: 5
Average number of characters: 29
Average number of communities: 3.2
Average clustering: 0.7674
Average modularity: 0.3732
Average diameter: 3.8
Average density: 0.1714

Director: David Cronenberg
Movies total: 3
Average number of characters: 29
Average number of communities: 2.6666666666666665
Average clustering: 0.5666666666666667
Average modularity: 0.35033333333333333
Average diameter: 4.333333333333333
Average density: 0.15233333333333332
