# Degrees

### Write a program that determines how many “degrees of separation” apart two actors are.

In [1]:
import sys
import csv

from util import Node, StackFrontier, QueueFrontier

In [2]:
# Maps names to a set of corresponding person_IDs
names = {}

# Maps person_IDs toa  dictionary of: name, birth, movies (a set of movie_IDs)
people = {}

# Maps movie_IDs to a dictionary of: title, year, starts (a set of person_IDs)
movies = {}

In [3]:
### Write a program that determines how many “degrees of separation” apart two actors are.

def load_data(directory):
    """
    Load data from CSV files into memory from the given directory.
    """
    
    # Load people
    with open(f"{directory}/people.csv", encoding = "utf-8") as f:
        reader = csv.DictReader(f)
        # For each row in the CSV file (each person)
        for row in reader:
            people[row["id"]] = {
                "name" : row["name"],
                "birth" : row["birth"],
                "movies" : set(),
            }
            
            # If the name is not in the list, add it to the list
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            # If the name is in the list, add the ID to the set
            else:
                names[row["name"].lower()].add(row["id"])
    
    # Load movies
    with open(f"{directory}/movies.csv", encoding = "utf-8") as f:
        reader = csv.DictReader(f)
        # For each row in the CSV file (each movie)
        for row in reader:
            # Add the movie to the list
            movies[row["id"]] = {
                "title" : row["title"],
                "year" : row["year"],
                "stars" : set(),
            }
            
    # Load stars
    with open(f"{directory}/stars.csv", encoding = "utf-8") as f:
        reader = csv.DictReader(f)
        # For each row in the CSV file (each star)
        for row in reader:
            # Try to add the star to the movie
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            # If the star is not in the movie, pass
            except KeyError:
                pass


In [4]:
def main():
    # Check for correct usage
    if len(sys.argv) > 2:
        sys.exit("Usage: python degrees.py [directory]")
    # Set the directory
    directory = sys.argv[1] if len(sys.argv) == 2 else "large"
    
    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")
    
    # Prompt user for name
    source = person_id_for_name(input("Name: "))
    # If the name is not in the list, exit
    if source is None:
        sys.exit("Person not found.")
    # Prompt user for name
    target = person_id_for_name(input("Name: "))
    # If the name is not in the list, exit
    if target is None:
        sys.exit("Person not found.")
    
    # Find shortest path
    path = shortest_path(source, target)
    
    # If there is no path, exit
    if path is None:
        print("Not connected.")
    # If there is a path, print the path
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        # Print the path
        path = [(None, source)] + path
        # For each degree in the path (each person)
        for i in range(degrees):
            # Get the name of the person and the movie
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")


In [5]:
def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs 
    that connect the source to the target.
    
    If no path possible, returns None.
    """
    # Initialize frontier to just the starting position
    start = Node(state = source, parent = None, action = None)
    frontier = QueueFrontier()
    frontier.add(start)
    
    # Initialize an empty explored set
    explored = set()
    
    # Keep looping until solution found
    while True:
        # If nothing left in frontier, then no path
        if frontier.empty():
            return None
        
        # Choose a node from the frontier
        node = frontier.remove()
        
        # If node is the goal, then we have a solution
        if node.state == target:
            actions = []
            cells = []
            
            # Keep looping until we reach the start
            while node.parent is not None:
                # Add the action and state to the lists
                actions.append(node.action)
                cells.append(node.state)
                # Move to the next node
                node = node.parent
            
            # Reverse the lists to get the correct order
            actions.reverse()
            cells.reverse()
            solution = []
            
            # Add the actions and cells to the solution
            # and return the solution
            for i in range(len(actions)):
                solution.append((actions[i], cells[i]))
            
            return solution
        
        # Mark node as explored
        explored.add(node.state)
        
        # Add neighbors to frontier
        for action, state in neighbors_for_person(node.state):
            if not frontier.contains_state(state) and state not in explored:
                child = Node(state = state, parent = node, action = action)
                frontier.add(child)


In [6]:
def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    
    # Get person IDs for person
    person_ids = list(names.get(name.lower(), set()))
    
    # If no person IDs, then person not found
    if len(person_ids) == 0:
        return None
    # If more than one person ID, ask user which one
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        # Print each person's ID, name, and birth year
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        # Prompt user for correct ID
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        # If invalid input, then return None
        except ValueError:
            pass
        return None
    # Otherwise, return ID of person
    else:
        return person_ids[0]

In [7]:
def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    # Get the movie IDs for the person
    movie_ids = people[person_id]["movies"]
    # Create a set of neighbors
    neighbors = set()
    # For each movie ID, add the movie ID and the person ID
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    # Return the neighbors
    return neighbors

In [8]:
# Call main function
if __name__ == "__main__":
    main()

SystemExit: Usage: python degrees.py [directory]

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
