In [5]:
import itertools
import os
import pickle
import tmdbsimple as tmdb

from requests.exceptions import HTTPError


tmdb.API_KEY = os.environ.get('TMDB_API_KEY', None)

queued_films = set()
if os.path.isfile('./data/static/queued_films.pickle'):
    with open('./data/static/queued_films.pickle', 'rb') as file:
        queued_films = pickle.load(file)

seen_films = set()
if os.path.isfile('./data/static/seen_films.pickle'):
    with open('./data/static/seen_films.pickle', 'rb') as file:
        seen_films = pickle.load(file)

rejected_films = set()
if os.path.isfile('./data/static/rejected_films.pickle'):
    with open('./data/static/rejected_films.pickle', 'rb') as file:
        rejected_films = pickle.load(file)

nodes = {}
if os.path.isfile('./data/static/nodes.pickle'):
    with open('./data/static/nodes.pickle', 'rb') as file:
        nodes = pickle.load(file)

edges = {}
if os.path.isfile('./data/static/edges.pickle'):
    with open('./data/static/edges.pickle', 'rb') as file:
        edges = pickle.load(file)


film_ids = list(queued_films - seen_films - rejected_films)


for film_id in film_ids:
    try:
        credits = tmdb.Movies(film_id).credits()
    except (HTTPError, ValueError):
        rejected_films.add(film_id)
        continue
        
    for person in credits['cast']:
        nodes.update({
            person['id']: {
                'name': person['name'],
                'gender': person['gender']}})
        
    cast_ids = [person['id'] for person in credits['cast']]
    
    for combination in itertools.combinations(cast_ids, 2):
        if combination[0] == combination[1]:
            continue
        
        pair = frozenset(combination)
        
        if pair in edges:
            edges[pair] += 1
        else:
            edges[pair] = 1
            
    seen_films.add(film_id)
    
    if len(seen_films) % 10 == 0:
        with open('./data/static/seen_films.pickle', 'wb') as file:
            pickle.dump(seen_films, file)
        
        with open('./data/static/rejected_films.pickle', 'wb') as file:
            pickle.dump(rejected_films, file)
        
        with open('./data/static/nodes.pickle', 'wb') as file:
            pickle.dump(nodes, file)
        
        with open('./data/static/edges.pickle', 'wb') as file:
            pickle.dump(edges, file)
            
        print('{}...'.format(len(seen_films)))

960...
970...
980...
990...
1000...
1010...
1020...
1030...
1040...
1050...
1060...
1070...
1080...
1090...
1100...
1110...
1120...
1130...
1140...
1150...
1160...
1170...
1180...
1190...
1200...
1210...
1220...
1230...
1240...
1250...


In [6]:
import csv
import pickle


with open('./data/static/nodes.pickle', 'rb') as infile, open('./data/static/nodes.csv', 'w') as outfile:
    nodes = pickle.load(infile)
    writer = csv.writer(outfile)
    
    writer.writerow(['ID', 'Label', 'Gender'])
    
    for tmdb_id, person in nodes.items():
        writer.writerow([tmdb_id, person['name'], person['gender']])
    
    
with open('./data/static/edges.pickle', 'rb') as infile, open('./data/static/edges.csv', 'w') as outfile:
    edges = pickle.load(infile)
    writer = csv.writer(outfile)
    
    writer.writerow(['Source', 'Target', 'Weight'])
    
    for pair, weight in edges.items():
        nodes = [x for x in pair]
        
        if len(nodes) != 2:
            continue
        
        writer.writerow([nodes[0], nodes[1], weight])