In [1]:
import sys

import os

import numpy as np

import itertools

import cPickle as pickle

import collections

import networkx as nx

import matplotlib.pyplot as plt

from scipy import stats

import time as t



def make_network(weight_dict, thresh=0):

    """

    Makes a NetworkX network from dictionary mapping pairs to weights

    """

    movie_net = nx.Graph()

    for pair, weight in weight_dict.iteritems():

        if "Unknown" in pair:

            continue

        if weight > thresh:

            movie_net.add_node(pair[0])

            movie_net.add_node(pair[1])

            movie_net.add_edge(pair[0], pair[1], weight=weight)

    return movie_net



#normalize centrality so that the sum is unity

def normalize(centrality):

	if sum(centrality.values()) !=0:

		normfactor = 1./sum(centrality.values()) 

		for node in centrality:

			centrality[node] *= normfactor

	return centrality



def statistics(G):

	prop = {}

	#simple graph properties

	prop['num_of_nodes'] = G.number_of_nodes()

	prop['num_of_edges'] = G.number_of_edges()

	prop['transitivity'] = nx.transitivity(G)

	prop['connectivity'] = nx.average_node_connectivity(G)

		

	#node measures and centralities

	prop['clustering'] = nx.clustering(G)

 	prop['triangles'] = nx.triangles(G)

 	prop['degree'] = normalize(nx.degree_centrality(G))

	prop['closeness'] = normalize(nx.degree_centrality(G))

	prop['betweenness'] = normalize(nx.betweenness_centrality(G))



	#things don't always make sense:	

	try:

		prop['assortativity'] = nx.degree_assortativity_coefficient(G)

	except:	pass

	try:

		prop['diameter'] = nx.diameter(G)

	except:	pass

	try:		

		prop['eigenvector'] = normalize(nx.eigenvector_centrality(G))

	except:	pass

	return prop



DATAPATH = "/Users/Thomas/Desktop/SFI/project/movie/movie_screenshares/raw_data/"



#generate movies

movies = {}

for fn in os.listdir(DATAPATH):

    with open(DATAPATH +fn) as file:

        try:

            movies[fn.split(".")[0]] = pickle.load(file)

        except:

            continue



#character gender id

gender_dic = {}

with open('gender.txt','rb') as f:

	for lines in f:

		key,val = lines.split()[0].split(',')[0],lines.split()[0].split(',')[2]

		gender_dic[key] = val 

gender_dic[-1] = 'Unkonwn'		



#generate networks and statistics	

networks = {}

net_stat = {}

for movie, movie_info in movies.iteritems():

    networks[movie] = make_network(movie_info["screen_share"])

    net_stat[movie] = {}

    #keep useful infos

    node_list = networks[movie].nodes()

    personid = [-1]*len(node_list)

    for i in range(len(node_list)):

    	for act in movie_info['people']['cast']:

			try:

				if node_list[i] == act['characterName']:

					personid[i] = act['personId']

					break

			except:	

				break

	net_stat[movie]['personid'] = personid

    net_stat[movie]['gender'] =  [gender_dic[key] for key in personid]

    net_stat[movie]['year'] = movie_info['metadata']['releaseYear']

    

#getting various network measures

for movie,movie_net in networks.iteritems():

	G = movie_net

	#simple graph properties

	net_stat[movie]['num_of_nodes'] = G.number_of_nodes()

	net_stat[movie]['num_of_edges'] = G.number_of_edges()

	net_stat[movie]['transitivity'] = nx.transitivity(G)

	net_stat[movie]['connectivity'] = nx.average_node_connectivity(G)

		

	#node measures and centralities

	net_stat[movie]['clustering'] = nx.clustering(G)

	net_stat[movie]['triangles'] = nx.triangles(G)

	net_stat[movie]['degree'] = normalize(nx.degree_centrality(G))

	net_stat[movie]['closeness'] = normalize(nx.degree_centrality(G))

	net_stat[movie]['betweenness'] = normalize(nx.betweenness_centrality(G))



	#things don't always make sense:	

	try:

		net_stat[movie]['assortativity'] = nx.degree_assortativity_coefficient(G)

	except:	pass

	try:

		net_stat[movie]['diameter'] = nx.diameter(G)

	except:	pass

	try:		

		net_stat[movie]['eigenvector'] = normalize(nx.eigenvector_centrality(G))

	except:	pass

	f = open(SAVEPATH +movie+'_stats'+'.pkl','wb')

	pickle.dump(net_stat[movie],f)

	f.close()





- use nbformat for read/write/validate public API
- use nbformat.vX directly to composing notebooks of a particular version

  """)


NotJSONError: Notebook does not appear to be JSON: 'import sys\nimport os\nimport numpy as ...