In [6]:
import pandas as pd

# Reading all the datasets
df1 = pd.read_csv('movies.csv')
df2 = pd.read_csv('people.csv')
df3 = pd.read_csv('stars.csv')

# Renaming the columns
df1.rename(columns = {'id':'movie_id'}, inplace = True)
df2.rename(columns = {'id':'person_id'}, inplace = True)

# Merging the datasets
df = df3.merge(df1, on='movie_id', how='left').merge(df2, on='person_id', how='left')

#Dropping unnecessary columns
df = df.drop(['birth', 'person_id', 'movie_id'], axis = 1)

#Sorting the columns
df = df.sort_index(axis=1)

#Saving the dataset
df.to_csv('small.csv', index=False)

In [11]:
df.head()

Unnamed: 0,name,title,year
0,Kevin Bacon,A Few Good Men,1992
1,Kevin Bacon,Apollo 13,1995
2,Tom Cruise,A Few Good Men,1992
3,Tom Cruise,Rain Man,1988
4,Cary Elwes,The Princess Bride,1987


In [12]:
import csv
import sys

from queue import Queue

# Function to load data from the CSV file
def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        next(reader)  # Skip header row if present
        for row in reader:
            if row[0] not in data:
                data[row[0]] = set()
            data[row[0]].add(row[1])
            if row[1] not in data:
                data[row[1]] = set()
            data[row[1]].add(row[0])
    return data

# Function to find the degree of separation between two actors using BFS
def bfs(start, end, data):
    visited = set()
    queue = Queue()
    queue.put((start, 0))  # (actor, depth)

    while not queue.empty():
        actor, degree = queue.get()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                queue.put((neighbor, degree + 1))  # Enqueue neighbors at next depth

    return None

# Main function
def main():
    filename = "small.csv"  # Replace with your actual CSV file path
    start = "Dustin Hoffman"
    end = "Chris Sarandon"

    data = load_data(filename)
    degree = bfs(start, end, data)

    if degree is None:
        print("No connection found")
    else:
        print(f"{start} and {end}: Degree of Separation = {degree}")

if __name__ == "__main__":
    main()


Dustin Hoffman and Chris Sarandon: Degree of Separation = 10


In [15]:
import csv
import sys

from collections import deque  # Use deque for flexibility

# Function to load data from the CSV file
def load_data(filename):
    data = {}
    with open(filename, "r") as file:
        reader = csv.reader(file)
        next(reader)  # Skip header row if present
        for row in reader:
            if row[0] not in data:
                data[row[0]] = set()
            data[row[0]].add(row[1])
            if row[1] not in data:
                data[row[1]] = set()
            data[row[1]].add(row[0])
    return data

# Function to find the degree of separation between two actors using DFS
def dfs(start, end, data):
    visited = set()
    stack = deque([(start, 0)])  # Use deque for both BFS and DFS

    while stack:
        actor, degree = stack.pop()
        if actor == end:
            return degree
        visited.add(actor)
        for neighbor in data[actor]:
            if neighbor not in visited:
                stack.append((neighbor, degree + 1))  # Push (neighbor, depth) onto stack

    return None

# Main function
def main():
    filename = "small.csv"  # Replace with your actual CSV file path
    start = "Dustin Hoffman"
    end = "Chris Sarandon"

    data = load_data(filename)
    degree = dfs(start, end, data)

    if degree is None:
        print("No connection found")
    else:
        print(f"{start} and {end}: Degree of Separation = {degree}")

if __name__ == "__main__":
    main()


Dustin Hoffman and Chris Sarandon: Degree of Separation = 10
