In [18]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, euclidean_distances
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, recall_score, f1_score

# Load the dataset
movies = pd.read_csv("MovieDatasetOriginal.csv")

In [19]:
tfidf_vectorizer = TfidfVectorizer(analyzer='word', stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['overview'])

# Compute Cosine Similarity Matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [20]:
# Initialize Nearest Neighbors model
nn_model = NearestNeighbors(n_neighbors=10, metric='cosine')
nn_model.fit(tfidf_matrix)

In [21]:
def get_cosine_similarity_recommendations(title, no_of_recommendations):
    index = movies[movies['original_title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:no_of_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices][['original_title', 'release_year', 'director']]

def get_knn_recommendations(title, no_of_recommendations):
    title_vector = tfidf_vectorizer.transform([title])
    distances, indices = nn_model.kneighbors(title_vector)
    movie_indices = indices.flatten()[1:no_of_recommendations+1]
    return movies.iloc[movie_indices][['original_title', 'release_year', 'director']]

def get_euclidean_distance_recommendations(title, no_of_recommendations):
    index = movies[movies['original_title'] == title].index[0]
    dist_scores = list(enumerate(euclidean_distances(tfidf_matrix[index], tfidf_matrix)[0]))
    dist_scores = sorted(dist_scores, key=lambda x: x[1])[:no_of_recommendations]
    movie_indices = [i[0] for i in dist_scores]
    return movies.iloc[movie_indices][['original_title', 'release_year', 'director']]

In [22]:
# Choose a movie title
input_title = "The Dark Knight"

cosine_sim_recommendations = get_cosine_similarity_recommendations(input_title, 5)
print("Recommendations using Cosine Similarity:")
print(cosine_sim_recommendations)

# Get recommendations using KNN
knn_recommendations = get_knn_recommendations(input_title, 5)
print("\nRecommendations using KNN:")
print(knn_recommendations)

# Get recommendations using Euclidean Distance
euclidean_dist_recommendations = get_euclidean_distance_recommendations(input_title, 5)
print("\nRecommendations using Euclidean Distance:")
print(euclidean_dist_recommendations)

Recommendations using Cosine Similarity:
                         original_title  release_year  \
722               The Dark Knight Rises          2012   
914                       Batman Begins          2005   
554  Sherlock Holmes: A Game of Shadows          2011   
857                     Despicable Me 2          2013   
80                 Secret in Their Eyes          2015   

                       director  
722           Christopher Nolan  
914           Christopher Nolan  
554                 Guy Ritchie  
857  Pierre Coffin|Chris Renaud  
80                    Billy Ray  

Recommendations using KNN:
             original_title  release_year           director
722   The Dark Knight Rises          2012  Christopher Nolan
143      As Above, So Below          2014  John Erick Dowdle
908             The Canyons          2013      Paul Schrader
1139     Death at a Funeral          2007           Frank Oz
105                    Lucy          2014         Luc Besson

Recommendations u

In [26]:
# Relevant items related to "The Dark Knight" (ground truth)
# relevant_items = ["The Dark Knight Rises", "Inception", "Interstellar", "Batman Begins", "The Dark Knight Trilogy"]
relevant_items = ["Avengers: Age of Ultron", "Guardians of the Galaxy", "Iron Man 2", "Captain America: The Winter Soldier", "Thor"]

# Function to calculate Precision, Recall, and F1-score
def evaluate_recommendations(recommended_items, relevant_items, K=5):
    recommended_set = set(recommended_items)
    relevant_set = set(relevant_items)
    true_positives = recommended_set.intersection(relevant_set)
    
    # Calculate Precision, Recall, and F1-score
    precision = len(true_positives) / K if K > 0 else 0.0
    recall = len(true_positives) / len(relevant_set) if len(relevant_set) > 0 else 0.0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    
    return precision, recall, f1

# Choose a movie title
# input_title = "The Dark Knight"
input_title = "Captain America: The First Avenger"

# Get recommendations using Cosine Similarity
cosine_sim_recommendations = get_cosine_similarity_recommendations(input_title, 5)
cosine_sim_titles = cosine_sim_recommendations['original_title'].tolist()
precision_cosine, recall_cosine, f1_cosine = evaluate_recommendations(cosine_sim_titles, relevant_items, K=5)

# Get recommendations using KNN
knn_recommendations = get_knn_recommendations(input_title, 5)
knn_titles = knn_recommendations['original_title'].tolist()
precision_knn, recall_knn, f1_knn = evaluate_recommendations(knn_titles, relevant_items, K=5)

# Get recommendations using Euclidean Distance
euclidean_dist_recommendations = get_euclidean_distance_recommendations(input_title, 5)
euclidean_dist_titles = euclidean_dist_recommendations['original_title'].tolist()
precision_euclidean, recall_euclidean, f1_euclidean = evaluate_recommendations(euclidean_dist_titles, relevant_items, K=5)

# Display evaluation results with three decimal places
print("Evaluation Results:")
print(f"Cosine Similarity Approach - Precision@5: {precision_cosine:.3f}")
print(f"Cosine Similarity Approach - Recall@5: {recall_cosine:.3f}")
print(f"Cosine Similarity Approach - F1-score@5: {f1_cosine:.3f}")

print(f"\nKNN Approach - Precision@5: {precision_knn:.3f}")
print(f"KNN Approach - Recall@5: {recall_knn:.3f}")
print(f"KNN Approach - F1-score@5: {f1_knn:.3f}")

print(f"\nEuclidean Distance Approach - Precision@5: {precision_euclidean:.3f}")
print(f"Euclidean Distance Approach - Recall@5: {recall_euclidean:.3f}")
print(f"Euclidean Distance Approach - F1-score@5: {f1_euclidean:.3f}")

Evaluation Results:
Cosine Similarity Approach - Precision@5: 0.200
Cosine Similarity Approach - Recall@5: 0.200
Cosine Similarity Approach - F1-score@5: 0.200

KNN Approach - Precision@5: 0.000
KNN Approach - Recall@5: 0.000
KNN Approach - F1-score@5: 0.000

Euclidean Distance Approach - Precision@5: 0.200
Euclidean Distance Approach - Recall@5: 0.200
Euclidean Distance Approach - F1-score@5: 0.200
