In [4]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import pairwise_distances
import numpy as np

# Load the dataset
data = pd.read_csv('movies.csv')

# Preprocessing genres
data['genres'] = data['genres'].apply(lambda x: x.split('|'))

# Binarizing the genre data
mlb = MultiLabelBinarizer()
genre_data = mlb.fit_transform(data['genres'])

# Function to calculate similarity for a specific movie against all others
def calculate_similarity(movie_index, genre_data, metric='euclidean'):
    # Calculate distances from a single movie to all others
    if metric == 'euclidean':
        distances = np.sqrt(np.sum((genre_data - genre_data[movie_index])**2, axis=1))
    elif metric == 'jaccard':
        intersection = np.logical_and(genre_data, genre_data[movie_index]).sum(axis=1)
        union = np.logical_or(genre_data, genre_data[movie_index]).sum(axis=1)
        distances = 1 - intersection / union
    return distances

# Function to find top N similar movies
def get_top_similar_movies(title, genre_data, top_n=10, metric='euclidean'):
    if title not in data['title'].values:
        return "Movie not found in dataset."

    movie_index = data.index[data['title'] == title].tolist()[0]
    distances = calculate_similarity(movie_index, genre_data, metric=metric)

    # Get indices of movies sorted by distance (ascending order for Euclidean)
    if metric == 'euclidean':
        sorted_indices = np.argsort(distances)
    else:
        sorted_indices = np.argsort(-distances)  # descending for Jaccard similarity

    top_indices = sorted_indices[1:top_n+1]  # Exclude the movie itself
    top_movies = data['title'].iloc[top_indices]
    top_scores = distances[top_indices]

    return list(zip(top_movies, top_scores))

# Define a function to display results in a DataFrame
def display_similar_movies_df(movie_title, genre_data, metric='euclidean', top_n=10):
    similar_movies = get_top_similar_movies(movie_title, genre_data, top_n=top_n, metric=metric)
    df = pd.DataFrame(similar_movies, columns=['Title', f'Similarity ({metric.capitalize()})'])
    return df

# Generate dataframes for each movie and metric
toy_story_df_euclidean = display_similar_movies_df("Toy Story (1995)", genre_data, 'euclidean')
toy_story_df_jaccard = display_similar_movies_df("Toy Story (1995)", genre_data, 'jaccard')

jumanji_df_euclidean = display_similar_movies_df("Jumanji (1995)", genre_data, 'euclidean')
jumanji_df_jaccard = display_similar_movies_df("Jumanji (1995)", genre_data, 'jaccard')

grumpier_old_men_df_euclidean = display_similar_movies_df("Grumpier Old Men (1995)", genre_data, 'euclidean')
grumpier_old_men_df_jaccard = display_similar_movies_df("Grumpier Old Men (1995)", genre_data, 'jaccard')

# Display tables
print("\nToy Story (1995) - Top 10 Similar Movies Based on Euclidean Distance:")
display(toy_story_df_euclidean)
print("\nToy Story (1995) - Top 10 Similar Movies Based on Jaccard Similarity:")
display(toy_story_df_jaccard)

print("\nJumanji (1995) - Top 10 Similar Movies Based on Euclidean Distance:")
display(jumanji_df_euclidean)
print("\nJumanji (1995) - Top 10 Similar Movies Based on Jaccard Similarity:")
display(jumanji_df_jaccard)

print("\nGrumpier Old Men (1995) - Top 10 Similar Movies Based on Euclidean Distance:")
display(grumpier_old_men_df_euclidean)
print("\nGrumpier Old Men (1995) - Top 10 Similar Movies Based on Jaccard Similarity:")
display(grumpier_old_men_df_jaccard)


Toy Story (1995) - Top 10 Similar Movies Based on Euclidean Distance:


Unnamed: 0,Title,Similarity (Euclidean)
0,"Tale of Despereaux, The (2008)",0.0
1,Riverdance: The Animated Adventure (2021),0.0
2,UglyDolls (2019),0.0
3,Soul (2020),0.0
4,Olaf's Frozen Adventure (2017),0.0
5,"Monsters, Inc. (2001)",0.0
6,Asterix and the Vikings (Astérix et les Viking...,0.0
7,Luca (2021),0.0
8,Tad the Lost Explorer and the Curse of the Mum...,0.0
9,Toy Story Toons: Small Fry (2011),0.0



Toy Story (1995) - Top 10 Similar Movies Based on Jaccard Similarity:


Unnamed: 0,Title,Similarity (Jaccard)
0,In the Presence of a Clown (1997),1.0
1,The Captains' Summit (2009),1.0
2,Die letzte Rache (1982),1.0
3,Confessions of Loving Couples (1967),1.0
4,Best Laid Plans (2012),1.0
5,Light Keeps Me Company (2000),1.0
6,Long Gone (2003),1.0
7,And We Go Green (2019),1.0
8,The Last Days of Frankie the Fly (1996),1.0
9,Mods (2002),1.0



Jumanji (1995) - Top 10 Similar Movies Based on Euclidean Distance:


Unnamed: 0,Title,Similarity (Euclidean)
0,Magic in the Water (1995),0.0
1,Little Ghost (1997),0.0
2,Five Children and It (2004),0.0
3,Paws (1997),0.0
4,Darby O'Gill and the Little People (1959),0.0
5,Bridge to Terabithia (2007),0.0
6,Foxter and Max (2019),0.0
7,"Golden Compass, The (2007)",0.0
8,Only a Miracle (2019),0.0
9,Anastasia: Once Upon a Time (2020),0.0



Jumanji (1995) - Top 10 Similar Movies Based on Jaccard Similarity:


Unnamed: 0,Title,Similarity (Jaccard)
0,The Mutants (1998),1.0
1,Trance (2006),1.0
2,The Fifth Empire (2004),1.0
3,Magic Mirror (2006),1.0
4,Goldstein (1965),1.0
5,The Beyond (2018),1.0
6,O Capacete Dourado (2007),1.0
7,An Insignificant Man (2016),1.0
8,Emmanuel Macron : les coulisses d'une victoire...,1.0
9,Awakening the Zodiac (2017),1.0



Grumpier Old Men (1995) - Top 10 Similar Movies Based on Euclidean Distance:


Unnamed: 0,Title,Similarity (Euclidean)
0,"Goodbye Girl, The (2004)",0.0
1,Playing for Keeps (2012),0.0
2,Nervous Ticks (1992),0.0
3,Save the Date (2012),0.0
4,The Princess Switch (2018),0.0
5,Dear Ex (2018),0.0
6,The Wrong Missy (2020),0.0
7,Without Borders (2015),0.0
8,"Heartbreak Kid, The (1972)",0.0
9,It Had to Be You (2016),0.0



Grumpier Old Men (1995) - Top 10 Similar Movies Based on Jaccard Similarity:


Unnamed: 0,Title,Similarity (Jaccard)
0,Yeh Ballet (2020),1.0
1,The Wolves (1996),1.0
2,Riding the Edge (1989),1.0
3,Family Without a Dinner Table (1985),1.0
4,Game For Vultures (1980),1.0
5,The Sentimental Engine Slayer (2010),1.0
6,Razzia (1955),1.0
7,Space Force (1978),1.0
8,Slasher Party (2019),1.0
9,Father and Son (1981),1.0
