In [46]:
import re
import json

import pandas as pd
import numpy as np

from collections import deque

## Process dataset

In [47]:
base_folder = "../movies-dataset/"
movies_metadata_fn = "movies_metadata.csv"
credits_fn = "credits.csv"
links_fn = "links.csv"

## Process movies_metadata data structure/schema

In [48]:
metadata = pd.read_csv(base_folder + movies_metadata_fn)
metadata.head(3)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0


## Cast id to int64 and drop any NAN values!

In [49]:
metadata.id = pd.to_numeric(metadata.id, downcast='signed', errors='coerce')

In [50]:
metadata = metadata[metadata['id'].notna()]

In [51]:
list(metadata.columns.values)

['adult',
 'belongs_to_collection',
 'budget',
 'genres',
 'homepage',
 'id',
 'imdb_id',
 'original_language',
 'original_title',
 'overview',
 'popularity',
 'poster_path',
 'production_companies',
 'production_countries',
 'release_date',
 'revenue',
 'runtime',
 'spoken_languages',
 'status',
 'tagline',
 'title',
 'video',
 'vote_average',
 'vote_count']

In [52]:
def CustomParser(data):
    obj = json.loads(data)
    return obj

We probably need id, title from this dataframe.

## Process credits data structure/schema

In [53]:
credits = pd.read_csv(base_folder + credits_fn)
# Cast id to int
credits.id = pd.to_numeric(credits.id, downcast='signed', errors='coerce')
credits.head()

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


In [54]:
# cast id to int64 for later join
metadata['id'] = metadata['id'].astype(np.int64)
credits['id'] = credits['id'].astype(np.int64)

In [55]:
metadata.dtypes

adult                     object
belongs_to_collection     object
budget                    object
genres                    object
homepage                  object
id                         int64
imdb_id                   object
original_language         object
original_title            object
overview                  object
popularity                object
poster_path               object
production_companies      object
production_countries      object
release_date              object
revenue                  float64
runtime                  float64
spoken_languages          object
status                    object
tagline                   object
title                     object
video                     object
vote_average             float64
vote_count               float64
dtype: object

In [56]:
credits.dtypes

cast    object
crew    object
id       int64
dtype: object

## Let's join the two dataset based on movie id

We start with one example movie `Toy Story` with id = 862 in metadata dataset.

In [57]:
merged = pd.merge(metadata, credits, on='id')

In [58]:
merged.head(3)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,cast,crew
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de..."


In [59]:
toy_story_id = 862
merged.loc[merged['id'] == toy_story_id]

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,cast,crew
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."


## Examine crew/cast json data schme for toy story

In [60]:
cast = merged.loc[merged['id'] == toy_story_id].cast
crew = merged.loc[merged['id'] == toy_story_id].crew

In [61]:
cast

0    [{'cast_id': 14, 'character': 'Woody (voice)',...
Name: cast, dtype: object

## Find all movies Tom hanks has acted in

In [62]:
def has_played(actor_name, cast_data):
    for cast in cast_data:
        name = cast['name']
        actor_id = cast['id']
        cast_id = cast['cast_id']
        credit_id = cast['credit_id']        
        if actor_name.lower() == name.lower():
            print("name: {}, id: {}, cast_id: {}, credit_id: {}".format(name, actor_id, cast_id, credit_id))
            return True
    return False

## Setup data structure

In [63]:
# a map from movie id to a list of actor id's
movie_actor_adj_list = {}
# a map from actor id to a list of movie id's
actor_movie_adj_list = {}
# a map from movies id to their title
movies_map = {}
# a map from actors id to their name
actors_map = {}

In [96]:
def cleanup_json(dirty_json):
    b = re.sub("{'", '{"', dirty_json)
    c = re.sub("':", '":', b)
    d = re.sub(", '", ', "', c)
    e = re.sub(": '", ': "', d)
    f = re.sub("', ", '", ', e)
    h = re.sub("'", '', f)
    i = re.sub('}', '"}', h)
    j = re.sub(r': ([a-zA-Z\s]+), ', r': "\1", ', i)
    k = re.sub(r' None"', r' null', j)
    return k

## Parse the data and construct the bipartite graph

In [97]:
cnt, errors = 0, 0
failed_movies = {}
for index, row in merged.iterrows():
    cnt += 1
    movie_id, movie_title = row['id'], row['title']
    if movie_id not in movies_map:
        movies_map[movie_id] = movie_title
    dirty_json = row['cast']
    try:
#         regex_replace = [(r"([ \{,:\[])(u)?'([^']+)'", r'\1"\3"'), (r" None", r' null')]
#         for r, s in regex_replace:
#             dirty_json = re.sub(r, s, dirty_json)
        dirty_json = cleanup_json(dirty_json)
#         print(dirty_json)
        cast_data = json.loads(dirty_json)
#         if has_played('Tom Hanks', cast_data):
#             print("Movie id: {}, title: {}".format(movie_id, movie_title))
        for cast in cast_data:
            actor_name = cast['name']
            actor_id = cast['id']
            if actor_id not in actors_map:
                actors_map[actor_id] = actor_name
            # build movie-actor adj list
            if movie_id not in movie_actor_adj_list:
                movie_actor_adj_list[movie_id] = [actor_id]
            else:
                movie_actor_adj_list[movie_id].append(actor_id)
            # build actor-movie adj list
            if actor_id not in actor_movie_adj_list:
                actor_movie_adj_list[actor_id] = [movie_id]
            else:
                actor_movie_adj_list[actor_id].append(movie_id)
    except json.JSONDecodeError as err:
        print("JSONDecodeError: {}, Movie id: {}, title: {}".format(err, movie_id, movie_title))
        if movie_id == 629:
            print(row['cast'][1893-50:1893+50])
#             break
        failed_movies[movie_id] = True
        errors += 1
print("Parsed credist: {}, errors: {}".format(cnt, errors))

JSONDecodeError: Expecting ',' delimiter: line 1 column 3271 (char 3270), Movie id: 10858, title: Nixon
JSONDecodeError: Expecting ',' delimiter: line 1 column 1894 (char 1893), Movie id: 629, title: The Usual Suspects
.jpg'}, {'cast_id': 28, 'character': 'Sgt. Jeffrey "Jeff" Rabin', 'credit_id': '52fe4260c3a36847f801
JSONDecodeError: Expecting ',' delimiter: line 1 column 1879 (char 1878), Movie id: 11859, title: Fair Game
JSONDecodeError: Expecting ',' delimiter: line 1 column 53 (char 52), Movie id: 9101, title: Down Periscope
JSONDecodeError: Expecting ',' delimiter: line 1 column 3259 (char 3258), Movie id: 2293, title: Mallrats
JSONDecodeError: Expecting ',' delimiter: line 1 column 1345 (char 1344), Movie id: 9070, title: Mighty Morphin Power Rangers: The Movie
JSONDecodeError: Expecting ',' delimiter: line 1 column 13216 (char 13215), Movie id: 8467, title: Dumb and Dumber
JSONDecodeError: Expecting ',' delimiter: line 1 column 236 (char 235), Movie id: 14819, title: Heavyweigh

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 3600 (char 3599), Movie id: 22318, title: Permanent Midnight
JSONDecodeError: Expecting ',' delimiter: line 1 column 1837 (char 1836), Movie id: 3121, title: Nashville
JSONDecodeError: Expecting ',' delimiter: line 1 column 238 (char 237), Movie id: 9821, title: The Mighty
JSONDecodeError: Expecting ',' delimiter: line 1 column 127 (char 126), Movie id: 32326, title: The Cruise
JSONDecodeError: Expecting ',' delimiter: line 1 column 473 (char 472), Movie id: 20701, title: Tales from the Darkside: The Movie
JSONDecodeError: Expecting ',' delimiter: line 1 column 1221 (char 1220), Movie id: 19426, title: Nights of Cabiria
JSONDecodeError: Expecting ',' delimiter: line 1 column 244 (char 243), Movie id: 9798, title: Enemy of the State
JSONDecodeError: Expecting ',' delimiter: line 1 column 2724 (char 2723), Movie id: 9749, title: Fletch
JSONDecodeError: Expecting ',' delimiter: line 1 column 249 (char 248), 

JSONDecodeError: Expecting ',' delimiter: line 1 column 247 (char 246), Movie id: 4988, title: Semi-Tough
JSONDecodeError: Expecting ',' delimiter: line 1 column 1342 (char 1341), Movie id: 15497, title: Twelve O'Clock High
JSONDecodeError: Expecting ',' delimiter: line 1 column 916 (char 915), Movie id: 10364, title: Catch-22
JSONDecodeError: Expecting ',' delimiter: line 1 column 651 (char 650), Movie id: 10868, title: The Accused
JSONDecodeError: Expecting ',' delimiter: line 1 column 238 (char 237), Movie id: 7520, title: Cocktail
JSONDecodeError: Expecting ',' delimiter: line 1 column 2100 (char 2099), Movie id: 5680, title: Elvira, Mistress of the Dark
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 11467, title: America's Sweethearts
JSONDecodeError: Expecting ',' delimiter: line 1 column 124 (char 123), Movie id: 11959, title: UHF
JSONDecodeError: Expecting ',' delimiter: line 1 column 40 (char 39), Movie id: 9880, title: The Princess Diaries
JSO

JSONDecodeError: Expecting ',' delimiter: line 1 column 3174 (char 3173), Movie id: 10677, title: Dirty Dancing: Havana Nights
JSONDecodeError: Expecting ',' delimiter: line 1 column 2831 (char 2830), Movie id: 1487, title: Hellboy
JSONDecodeError: Expecting ',' delimiter: line 1 column 240 (char 239), Movie id: 14442, title: Ella Enchanted
JSONDecodeError: Expecting ',' delimiter: line 1 column 237 (char 236), Movie id: 9509, title: Man on Fire
JSONDecodeError: Expecting ',' delimiter: line 1 column 692 (char 691), Movie id: 10347, title: Dobermann
JSONDecodeError: Expecting ',' delimiter: line 1 column 52 (char 51), Movie id: 30959, title: Kwaidan
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 17538, title: Rooster Cogburn
JSONDecodeError: Expecting ',' delimiter: line 1 column 222 (char 221), Movie id: 31112, title: Bon Voyage, Charlie Brown (and Don't Come Back!)
JSONDecodeError: Expecting ',' delimiter: line 1 column 638 (char 637), Movie id: 11602

JSONDecodeError: Expecting ',' delimiter: line 1 column 1247 (char 1246), Movie id: 18614, title: Interrogation
JSONDecodeError: Expecting ',' delimiter: line 1 column 38 (char 37), Movie id: 63395, title: A Run for Your Money
JSONDecodeError: Expecting ',' delimiter: line 1 column 624 (char 623), Movie id: 36380, title: Boys Don't Cry
JSONDecodeError: Expecting ',' delimiter: line 1 column 39 (char 38), Movie id: 39788, title: Vinci
JSONDecodeError: Expecting ',' delimiter: line 1 column 873 (char 872), Movie id: 45759, title: Helter Skelter
JSONDecodeError: Expecting ',' delimiter: line 1 column 39 (char 38), Movie id: 11183, title: French Fried Vacation
JSONDecodeError: Expecting ',' delimiter: line 1 column 1453 (char 1452), Movie id: 27629, title: Here Comes the Groom
JSONDecodeError: Expecting ',' delimiter: line 1 column 1663 (char 1662), Movie id: 7304, title: Running Scared
JSONDecodeError: Expecting ',' delimiter: line 1 column 830 (char 829), Movie id: 7483, title: Free Zone

JSONDecodeError: Expecting ',' delimiter: line 1 column 37 (char 36), Movie id: 7517, title: Live Free or Die
JSONDecodeError: Expecting ',' delimiter: line 1 column 48 (char 47), Movie id: 16869, title: Inglourious Basterds
JSONDecodeError: Expecting ',' delimiter: line 1 column 39 (char 38), Movie id: 38872, title: Kiler-ów 2-óch
JSONDecodeError: Expecting ',' delimiter: line 1 column 671 (char 670), Movie id: 38869, title: Wyjście awaryjne
JSONDecodeError: Expecting ',' delimiter: line 1 column 1415 (char 1414), Movie id: 42062, title: Moontide
JSONDecodeError: Expecting ',' delimiter: line 1 column 444 (char 443), Movie id: 155426, title: Anioł w Krakowie
JSONDecodeError: Expecting ',' delimiter: line 1 column 968 (char 967), Movie id: 58018, title: U Pana Boga za piecem
JSONDecodeError: Expecting ',' delimiter: line 1 column 231 (char 230), Movie id: 27799, title: Hana and Alice
JSONDecodeError: Invalid \escape: line 1 column 1511 (char 1510), Movie id: 14728, title: The Librarian

JSONDecodeError: Expecting ',' delimiter: line 1 column 241 (char 240), Movie id: 60062, title: Judy Moody and the Not Bummer Summer
JSONDecodeError: Expecting ',' delimiter: line 1 column 562 (char 561), Movie id: 108213, title: King of Jazz
JSONDecodeError: Expecting ',' delimiter: line 1 column 1517 (char 1516), Movie id: 62382, title: The Mattei Affair
JSONDecodeError: Expecting ',' delimiter: line 1 column 1924 (char 1923), Movie id: 49013, title: Cars 2
JSONDecodeError: Expecting ',' delimiter: line 1 column 39 (char 38), Movie id: 48627, title: Room for One More
JSONDecodeError: Expecting ',' delimiter: line 1 column 469 (char 468), Movie id: 68029, title: Double Dhamaal
JSONDecodeError: Expecting ',' delimiter: line 1 column 954 (char 953), Movie id: 134168, title: Trio
JSONDecodeError: Expecting ',' delimiter: line 1 column 1025 (char 1024), Movie id: 45801, title: Joan of Paris
JSONDecodeError: Expecting ',' delimiter: line 1 column 1020 (char 1019), Movie id: 33927, title: D

JSONDecodeError: Expecting ',' delimiter: line 1 column 1250 (char 1249), Movie id: 190754, title: On the Job
JSONDecodeError: Expecting ',' delimiter: line 1 column 3735 (char 3734), Movie id: 200713, title: 9 Month Stretch
JSONDecodeError: Expecting ',' delimiter: line 1 column 246 (char 245), Movie id: 163392, title: The War Between Men and Women
JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2012 (char 2011), Movie id: 121636, title: The Rat Race
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 8985, title: Visions of Europe
JSONDecodeError: Expecting ',' delimiter: line 1 column 4348 (char 4347), Movie id: 109443, title: Anchorman 2: The Legend Continues
JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2034 (char 2033), Movie id: 58799, title: Christmas Eve
JSONDecodeError: Expecting ',' delimiter: line 1 column 53 (char 52), Movie id: 37129, title: Only Old Men Are Going to Battle
J

JSONDecodeError: Expecting ',' delimiter: line 1 column 36 (char 35), Movie id: 23853, title: Wish You Were Dead
JSONDecodeError: Expecting ',' delimiter: line 1 column 1258 (char 1257), Movie id: 99345, title: The Gallant Hours
JSONDecodeError: Expecting ',' delimiter: line 1 column 457 (char 456), Movie id: 17128, title: Hanzo the Razor: The Snare
JSONDecodeError: Expecting ',' delimiter: line 1 column 49 (char 48), Movie id: 139922, title: She's Working Her Way Through College
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 74550, title: The Smiling Ghost
JSONDecodeError: Expecting ',' delimiter: line 1 column 36 (char 35), Movie id: 217719, title: Snake & Mongoose
JSONDecodeError: Expecting ',' delimiter: line 1 column 39 (char 38), Movie id: 72160, title: Yellowstone Kelly
JSONDecodeError: Expecting ',' delimiter: line 1 column 856 (char 855), Movie id: 44023, title: Make Mine Mink
JSONDecodeError: Expecting ',' delimiter: line 1 column 38 (char 37)

JSONDecodeError: Expecting ',' delimiter: line 1 column 2098 (char 2097), Movie id: 302960, title: Scooby-Doo! Moon Monster Madness
JSONDecodeError: Expecting ',' delimiter: line 1 column 4108 (char 4107), Movie id: 49725, title: Tigers in Lipstick
JSONDecodeError: Expecting ',' delimiter: line 1 column 37 (char 36), Movie id: 204912, title: Thunder in Paradise
JSONDecodeError: Expecting ',' delimiter: line 1 column 441 (char 440), Movie id: 305747, title: Hoovey
JSONDecodeError: Expecting ',' delimiter: line 1 column 3134 (char 3133), Movie id: 331161, title: See You In Valhalla
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 214146, title: UncuT: Member Only
JSONDecodeError: Expecting ',' delimiter: line 1 column 228 (char 227), Movie id: 240154, title: The Life Coach
JSONDecodeError: Expecting ',' delimiter: line 1 column 424 (char 423), Movie id: 16355, title: Sonic The Hedgehog: The Movie
JSONDecodeError: Expecting ',' delimiter: line 1 column 47 (c

JSONDecodeError: Expecting ',' delimiter: line 1 column 44 (char 43), Movie id: 299687, title: The 5th Wave
JSONDecodeError: Expecting ',' delimiter: line 1 column 4280 (char 4279), Movie id: 39979, title: The Big Racket
JSONDecodeError: Expecting ',' delimiter: line 1 column 582 (char 581), Movie id: 317953, title: Spud 3: Learning to Fly
JSONDecodeError: Expecting ',' delimiter: line 1 column 435 (char 434), Movie id: 16422, title: The Caller
JSONDecodeError: Expecting ',' delimiter: line 1 column 40 (char 39), Movie id: 248706, title: Mrs. Lambert Remembers Love
JSONDecodeError: Expecting ',' delimiter: line 1 column 232 (char 231), Movie id: 203066, title: Motorcycle Gang
JSONDecodeError: Expecting ',' delimiter: line 1 column 40 (char 39), Movie id: 16308, title: Mobile Suit Gundam F91
JSONDecodeError: Expecting ',' delimiter: line 1 column 45 (char 44), Movie id: 95164, title: About Love
JSONDecodeError: Expecting ',' delimiter: line 1 column 3195 (char 3194), Movie id: 315319, t

JSONDecodeError: Expecting ',' delimiter: line 1 column 1063 (char 1062), Movie id: 86254, title: Crash Landing
JSONDecodeError: Expecting ',' delimiter: line 1 column 3416 (char 3415), Movie id: 156335, title: The Vanishing Legion
JSONDecodeError: Expecting ',' delimiter: line 1 column 240 (char 239), Movie id: 132765, title: Stone & Ed
JSONDecodeError: Expecting ',' delimiter: line 1 column 42 (char 41), Movie id: 248781, title: ABCs of Death 2.5
JSONDecodeError: Expecting ',' delimiter: line 1 column 1246 (char 1245), Movie id: 95610, title: Bridget Jones's Baby
JSONDecodeError: Expecting ',' delimiter: line 1 column 1908 (char 1907), Movie id: 393441, title: Dishoom
JSONDecodeError: Expecting ',' delimiter: line 1 column 1033 (char 1032), Movie id: 124202, title: I Carabbinieri
JSONDecodeError: Expecting ',' delimiter: line 1 column 68 (char 67), Movie id: 38289, title: Eccezzziunale... veramente
JSONDecodeError: Expecting ',' delimiter: line 1 column 1533 (char 1532), Movie id: 31

JSONDecodeError: Expecting ',' delimiter: line 1 column 57 (char 56), Movie id: 354534, title: The Invoking 2
JSONDecodeError: Expecting ',' delimiter: line 1 column 230 (char 229), Movie id: 92983, title: Businessmen
JSONDecodeError: Expecting ',' delimiter: line 1 column 483 (char 482), Movie id: 22643, title: Mickey's House of Villains
JSONDecodeError: Expecting ',' delimiter: line 1 column 637 (char 636), Movie id: 435041, title: Obsession
JSONDecodeError: Expecting ',' delimiter: line 1 column 1500 (char 1499), Movie id: 54309, title: Notte prima degli esami - Oggi
JSONDecodeError: Expecting ',' delimiter: line 1 column 41 (char 40), Movie id: 38397, title: La leggenda di Al, John e Jack
JSONDecodeError: Expecting ',' delimiter: line 1 column 459 (char 458), Movie id: 26177, title: Annie
JSONDecodeError: Expecting ',' delimiter: line 1 column 245 (char 244), Movie id: 42642, title: The Adventures of Prince Florisel
JSONDecodeError: Expecting ',' delimiter: line 1 column 51 (char 5

In [21]:
movie_actor_adj_list[862]

[31,
 12898,
 7167,
 12899,
 12900,
 7907,
 8873,
 1116442,
 12901,
 12133,
 8655,
 12903,
 37221]

## Create the inverse maps for movies/actors

In [98]:
inv_actors_map = {v: k for k, v in actors_map.items()}
inv_movies_map = {v: k for k, v in movies_map.items()}

In [99]:
kevin_id = inv_actors_map['Kevin Bacon']
print(kevin_id)

4724


## Run BFS to compute the Bacon number

In [100]:
DEBUG = False

q = deque()
q.append(kevin_id)
bacon_degrees = {kevin_id: 0}
visited = {}
degree = 1

while q:
    u = q.popleft()
    if DEBUG:
        print("u: {}".format(u))
    if u not in visited:
        visited[u] = True
        if DEBUG:
            print("degree(u): {}".format(bacon_degrees[u]))
        if bacon_degrees[u] % 2 == 0:
            # u is an actor type node
            neighbors = actor_movie_adj_list[u]
            if DEBUG:
                print("actor type, neighbors: {}".format(neighbors))
        else:
            # u is a movie type node
            neighbors = movie_actor_adj_list[u]
            if DEBUG:
                print("movie type, neighbors: {}".format(neighbors))
        for v in neighbors:
            if v not in visited:
                q.append(v)
                if v not in bacon_degrees:
                    bacon_degrees[v] = bacon_degrees[u] + 1

In [101]:
bacon_degrees[kevin_id]

0

In [103]:
actor_id = inv_actors_map['Tom Hanks']
bacon_degrees[actor_id]/2

1.0

In [104]:
actor_id = inv_actors_map['Tom Cruise']
bacon_degrees[actor_id]/2

1.0

In [105]:
movie_id = inv_movies_map['Apollo 13']
failed_movies[movie_id]

KeyError: 568

In [106]:
tom_id = inv_actors_map['Tom Cruise']
tom_cruise_movies = actor_movie_adj_list[tom_id]

kevin_id = inv_actors_map['Kevin Bacon']
kevin_bacon_movies = actor_movie_adj_list[kevin_id]

In [107]:
set(tom_cruise_movies).intersection(set(kevin_bacon_movies))

{881}

In [108]:
movies_map[881]

'A Few Good Men'

In [109]:
actor_id = inv_actors_map['Ronald Reagan']
# print(chris_id)
bacon_degrees[actor_id]/2

2.0