In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
movies = pd.read_csv('movies.csv')

# TF-IDF Vectorizer for genres
tfidf = TfidfVectorizer(stop_words='english')
movies['genres'] = movies['genres'].fillna('')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Dimensionality reduction using TruncatedSVD
n_components = min(20, tfidf_matrix.shape[1])  # Use 20 or less depending on the number of features
svd = TruncatedSVD(n_components=n_components)
tfidf_matrix_reduced = svd.fit_transform(tfidf_matrix)

# Compute cosine similarity on the reduced matrix
cosine_sim = cosine_similarity(tfidf_matrix_reduced, tfidf_matrix_reduced)

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = movies[movies['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example usage
print(get_recommendations('Toy Story (1995)'))


2203                                           Antz (1998)
3021                                    Toy Story 2 (1999)
3653        Adventures of Rocky and Bullwinkle, The (2000)
3912                      Emperor's New Groove, The (2000)
4780                                 Monsters, Inc. (2001)
9949     DuckTales: The Movie - Treasure of the Lost La...
10773                                     Wild, The (2006)
11604                               Shrek the Third (2007)
12969                       Tale of Despereaux, The (2008)
17431    Asterix and the Vikings (Astérix et les Viking...
Name: title, dtype: object


In [14]:
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('ratings.csv')

# Prepare the data for Surprise
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD algorithm for collaborative filtering
algo = SVD()
algo.fit(trainset)

# Function to get collaborative filtering recommendations
def get_collaborative_recommendations(user_id, algo=algo, n=10):
    user_ratings = ratings[ratings['userId'] == user_id]
    movie_ids = user_ratings['movieId'].unique()
    all_movie_ids = ratings['movieId'].unique()
    movie_ids_to_predict = [mid for mid in all_movie_ids if mid not in movie_ids]
    
    predictions = [algo.predict(user_id, mid) for mid in movie_ids_to_predict]
    predictions = sorted(predictions, key=lambda x: x.est, reverse=True)
    top_n_predictions = predictions[:n]
    
    recommended_movie_ids = [pred.iid for pred in top_n_predictions]
    return movies[movies['movieId'].isin(recommended_movie_ids)]['title']

# Example usage
print(get_collaborative_recommendations(1))

# Evaluate the model
predictions = algo.test(testset)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

print(f"Collaborative Filtering RMSE: {rmse}")
print(f"Collaborative Filtering MAE: {mae}")


57                       Postman, The (Postino, Il) (1994)
2766                                American Beauty (1999)
11514    Inglorious Bastards (Quel maledetto treno blin...
13915     Dimensions of Dialogue (Moznosti dialogu) (1982)
16927                      Bill Cunningham New York (2011)
19611             Death on the Staircase (Soupçons) (2004)
26741          Louis C.K.: Live at The Comedy Store (2015)
26779                Story of Film: An Odyssey, The (2011)
32968                               The Blue Planet (2001)
45593                              Band of Brothers (2001)
Name: title, dtype: object
RMSE: 0.7779
MAE:  0.5868
Collaborative Filtering RMSE: 0.7778524829807567
Collaborative Filtering MAE: 0.5867883398114546


In [15]:
def hybrid_recommendations(title, user_id, n=10):
    content_recommendations = get_recommendations(title)
    collaborative_recommendations = get_collaborative_recommendations(user_id)
    
    # Combine recommendations
    combined_recommendations = list(content_recommendations) + list(collaborative_recommendations)
    
    # Remove duplicates while maintaining order
    seen = set()
    final_recommendations = []
    for movie in combined_recommendations:
        if movie not in seen:
            final_recommendations.append(movie)
            seen.add(movie)
    
    return final_recommendations[:n]

# Example usage
print(hybrid_recommendations('Toy Story (1995)', 1))


['Antz (1998)', 'Toy Story 2 (1999)', 'Adventures of Rocky and Bullwinkle, The (2000)', "Emperor's New Groove, The (2000)", 'Monsters, Inc. (2001)', 'DuckTales: The Movie - Treasure of the Lost Lamp (1990)', 'Wild, The (2006)', 'Shrek the Third (2007)', 'Tale of Despereaux, The (2008)', 'Asterix and the Vikings (Astérix et les Vikings) (2006)']


In [18]:
from sklearn.metrics import precision_score, recall_score

# Assuming you have ground truth data for user recommendations
# ground_truth_recommendations = {user_id: [list of relevant movie_ids]}
# predicted_recommendations = {user_id: [list of predicted movie_ids]}

def evaluate_recommendations(ground_truth, predictions):
    all_precisions = []
    all_recalls = []

    for user_id in ground_truth:
        true_positives = len(set(ground_truth[user_id]) & set(predictions[user_id]))
        precision = true_positives / len(predictions[user_id]) if predictions[user_id] else 0
        recall = true_positives / len(ground_truth[user_id]) if ground_truth[user_id] else 0

        all_precisions.append(precision)
        all_recalls.append(recall)

    avg_precision = sum(all_precisions) / len(all_precisions)
    avg_recall = sum(all_recalls) / len(all_recalls)

    return avg_precision, avg_recall

# Example usage
ground_truth_recommendations = {
    1: [10, 20, 30, 40],
    2: [50, 60, 70, 80],
}

predicted_recommendations = {
    1: [10, 25, 30, 45],
    2: [55, 60, 75, 85],
}

precision, recall = evaluate_recommendations(ground_truth_recommendations, predicted_recommendations)
print(f"Precision: {precision}")
print(f"Recall: {recall}")


Precision: 0.375
Recall: 0.375
