In [129]:
import pandas as pd 

directory = 'large'

movies = pd.read_csv(f'./{directory}/movies.csv')
peoples = pd.read_csv(f'./{directory}/people.csv')
stars = pd.read_csv(f'./{directory}/stars.csv')

movies.head()

Unnamed: 0,id,title,year
0,15724,Dama de noche,1993
1,23331,Pesn o geroyakh,1983
2,31458,El huésped del sevillano,1970
3,35423,Kate & Leopold,2001
4,36606,"Another Time, Another Place",1983


In [42]:
peoples.head()

Unnamed: 0,id,name,birth
0,102,Kevin Bacon,1958
1,129,Tom Cruise,1962
2,144,Cary Elwes,1962
3,158,Tom Hanks,1956
4,1597,Mandy Patinkin,1952


In [43]:
stars.head()

Unnamed: 0,person_id,movie_id
0,102,104257
1,102,112384
2,129,104257
3,129,95953
4,144,93779


In [130]:
person_to_id = {} #persons name to their id
personid_to_movie_person = {} # person id to their name and their movie
movieid_to_movie_person = {} # id of movie to name of movie

for elem in peoples.iloc[:].values.tolist():
    person_to_id.update({elem[1]: elem[0]})
    person_id = elem[0]
    info = {}
    info['name'] = elem[1]
    info['birth'] = elem[-1]
    info['movies'] = set()
    personid_to_movie_person.update({person_id: info})

for elem in movies.iloc[:].values.tolist():
    title_year = {}
    title_year['title'] = elem[1]
    title_year['year'] = elem[-1]
    title_year['star'] = set()
    keys = elem[0]
    movieid_to_movie_person.update({keys: title_year})

for elem in stars.iloc[:].values.tolist():
    person_id, movie_id = elem[0], elem[1]
    name_and_movie = {}
    try:
        movieid_to_movie_person[movie_id]['star'].add(person_id)
        personid_to_movie_person[person_id]['movies'].add(movie_id)
    except KeyError:
        continue

In [45]:
movieid_to_movie_person

{112384: {'title': 'Apollo 13', 'year': 1995, 'star': {102, 158, 200, 641}},
 104257: {'title': 'A Few Good Men',
  'year': 1992,
  'star': {102, 129, 193, 197}},
 109830: {'title': 'Forrest Gump', 'year': 1994, 'star': {158, 398, 641, 705}},
 93779: {'title': 'The Princess Bride',
  'year': 1987,
  'star': {144, 705, 1597, 1697}},
 95953: {'title': 'Rain Man', 'year': 1988, 'star': {129, 163, 420, 596520}}}

In [46]:
personid_to_movie_person
    

{102: {'name': 'Kevin Bacon', 'birth': 1958, 'movies': {104257, 112384}},
 129: {'name': 'Tom Cruise', 'birth': 1962, 'movies': {95953, 104257}},
 144: {'name': 'Cary Elwes', 'birth': 1962, 'movies': {93779}},
 158: {'name': 'Tom Hanks', 'birth': 1956, 'movies': {109830, 112384}},
 1597: {'name': 'Mandy Patinkin', 'birth': 1952, 'movies': {93779}},
 163: {'name': 'Dustin Hoffman', 'birth': 1937, 'movies': {95953}},
 1697: {'name': 'Chris Sarandon', 'birth': 1942, 'movies': {93779}},
 193: {'name': 'Demi Moore', 'birth': 1962, 'movies': {104257}},
 197: {'name': 'Jack Nicholson', 'birth': 1937, 'movies': {104257}},
 200: {'name': 'Bill Paxton', 'birth': 1955, 'movies': {112384}},
 398: {'name': 'Sally Field', 'birth': 1946, 'movies': {109830}},
 420: {'name': 'Valeria Golino', 'birth': 1965, 'movies': {95953}},
 596520: {'name': 'Gerald R. Molen', 'birth': 1935, 'movies': {95953}},
 641: {'name': 'Gary Sinise', 'birth': 1955, 'movies': {109830, 112384}},
 705: {'name': 'Robin Wright', '

In [47]:
person_to_id

{'Kevin Bacon': 102,
 'Tom Cruise': 129,
 'Cary Elwes': 144,
 'Tom Hanks': 158,
 'Mandy Patinkin': 1597,
 'Dustin Hoffman': 163,
 'Chris Sarandon': 1697,
 'Demi Moore': 193,
 'Jack Nicholson': 197,
 'Bill Paxton': 200,
 'Sally Field': 398,
 'Valeria Golino': 420,
 'Gerald R. Molen': 596520,
 'Gary Sinise': 641,
 'Robin Wright': 705,
 'Emma Watson': 914612}

In [101]:
def get_person_id(person_name):
    try:
        return person_to_id[person_name]
    except KeyError:
        raise ValueError(f'{person_name} not exist in dataset')

def get_name_from_id(person_id):
    try:
        return personid_to_movie_person[person_id]['name']
    except KeyError:
        raise ValueError(f'{person_id} not exist in dataset')

def get_input():
    while True:
        first_person = input('Enter first person:').lower().title()
        if first_person == '':
            break
        try:
            id_first_person = get_person_id(first_person)
            print(f'Found {first_person}!')
            break
        except ValueError as e:
            print(e)
            continue
    while True:
        second_person = input('Enter second person:').lower().title()
        if second_person == '':
            break
        try:
            id_second_person = get_person_id(second_person)
            print(f'Found {second_person}!')
            break
        except ValueError as e:
            print(e)
            continue
    return id_first_person, id_second_person

In [127]:
def neightbors_person(person_id):
    # Return id and movie of person who star with person_id
    neightbors = set()
    try:
        movie_ids = personid_to_movie_person[person_id]['movies']
        for movie_id in movie_ids:
            for person_id_neighbor in movieid_to_movie_person[movie_id]['star']:
                # if person_id == person_id_neighbor:
                #     continue
                neightbors.add((movie_id, person_id_neighbor))
    except KeyError:
        pass
    return neightbors # set of tuple

In [128]:
# Start -> Hanks: 158, Goal -> Cruise: 129
print(sorted(neightbors_person(158), key=lambda x: x[1]))

[(112384, 102), (112384, 158), (109830, 158), (112384, 200), (109830, 398), (112384, 641), (109830, 641), (109830, 705)]


In [103]:
from util import Node, QueueFrontier, StackFrontier

In [None]:
frontier = QueueFrontier() # BFS
num_explored = 0
explored = set()
start_id, goal_id = get_input()

start = Node(state=start_id, action=None, parent=None)
frontier.add(start)

solutions = []

while True:
    if frontier.empty():
        print('Frontier is empty. No solution')
        break
    
    node = frontier.remove()
    num_explored += 1
    
    if node.state == goal_id:
        while node.parent is not None:
            movie_id = node.action
            person_id = node.state
            solutions.append((movie_id, person_id))
            node = node.parent
        solutions.reverse()
        break
    
    explored.add(node.state)
    
    for action, state in neightbors_person(node.state):
        if state not in explored:
            child = Node(state=state, parent=node, action=action)
            frontier.add(child)

In [94]:
num_explored

10

In [116]:
solutions

[(112384, 102), (104257, 129)]

In [80]:
def get_name_movie(movie_id):
    try:
        return movieid_to_movie_person[movie_id]['title']
    except KeyError:
        print(f'No title of {movie_id} id found!')
    return 'Not Found'

In [121]:
print(len(solutions),'degrees of seperation')
for i in range(0, len(solutions)):
    if i == 0:
        name1 = get_name_from_id(start_id)
    else:
        person1 = solutions[i - 1][1]
        name1 = get_name_from_id(person1)
    person2 = solutions[i][1]
    name2 = get_name_from_id(person2)
    film = solutions[i][0]
    film_name = get_name_movie(film)
    
    print(f'{i+1}: {name1} and {name2} starred in movie "{film_name}"')

2 degrees of seperation
1: Tom Cruise and Kevin Bacon starred in movie "A Few Good Men"
2: Kevin Bacon and Tom Hanks starred in movie "Apollo 13"
