In [7]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

In [8]:
# User data
users = {
    'user_id': [1, 2, 3],
    'user_name': ['Alice', 'Bob', 'Charlie']
}
users_df = pd.DataFrame(users)

# Movies data
movies = {
    'item_id': [1, 2, 3],
    'title': ['Action Movie', 'Romantic Comedy', 'Thriller'],
    'description': ['Action packed movie with heroes', 'A romantic comedy movie', 'A thrilling drama with twists']
}
movies_df = pd.DataFrame(movies)

# Ratings data
ratings = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 3],
    'item_id': [1, 2, 3, 2, 3, 1, 2, 3],
    'rating': [5, 3, 4, 4, 5, 2, 3, 5]
}
ratings_df = pd.DataFrame(ratings)

print("Users:")
print(users_df)
print("\nMovies:")
print(movies_df)
print("\nRatings:")
print(ratings_df)

Users:
   user_id user_name
0        1     Alice
1        2       Bob
2        3   Charlie

Movies:
   item_id            title                      description
0        1     Action Movie  Action packed movie with heroes
1        2  Romantic Comedy          A romantic comedy movie
2        3         Thriller    A thrilling drama with twists

Ratings:
   user_id  item_id  rating
0        1        1       5
1        1        2       3
2        1        3       4
3        2        2       4
4        2        3       5
5        3        1       2
6        3        2       3
7        3        3       5


In [9]:
# Using Surprise library to load data
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['user_id', 'item_id', 'rating']], reader)

# Train the SVD model
trainset = data.build_full_trainset()
svd = SVD()
svd.fit(trainset)

# Function to predict ratings
def predict_rating(user_name, movie_title):
    user_id = users_df[users_df['user_name'] == user_name].iloc[0]['user_id']
    item_id = movies_df[movies_df['title'] == movie_title].iloc[0]['item_id']
    predicted_rating = svd.predict(user_id, item_id).est
    return predicted_rating

# Example prediction
user_name = 'Alice'
movie_title = 'Romantic Comedy'
predicted_rating = predict_rating(user_name, movie_title)
print(f'Predicted rating for {user_name} on "{movie_title}" is {predicted_rating}')

Predicted rating for Alice on "Romantic Comedy" is 3.553216064097584


In [10]:
# Compute TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['description'])

# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get content-based recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = movies_df[movies_df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['title'].iloc[movie_indices]

# Example usage
print(f'Recommendations for "Action Movie":')
print(get_recommendations('Action Movie'))

Recommendations for "Action Movie":
1    Romantic Comedy
2           Thriller
Name: title, dtype: object


In [11]:
def hybrid_recommend(user_name, title):
    # Collaborative filtering recommendation
    user_id = users_df[users_df['user_name'] == user_name].iloc[0]['user_id']
    user_recommendations = [item_id for item_id in movies_df['item_id'] if svd.predict(user_id, item_id).est > 4]
    user_recommendation_titles = movies_df[movies_df['item_id'].isin(user_recommendations)]['title'].tolist()
    
    # Content-based recommendation
    content_recommendations = get_recommendations(title)
    content_recommendation_titles = content_recommendations.tolist()
    
    # Combine recommendations
    combined_recommendations = list(set(user_recommendation_titles) | set(content_recommendation_titles))
    return combined_recommendations

# Example usage
user_name = 'Alice'
title = 'Action Movie'
print(f'Hybrid recommendations for {user_name} based on "{title}":')
print(hybrid_recommend(user_name, title))

Hybrid recommendations for Alice based on "Action Movie":
['Thriller', 'Action Movie', 'Romantic Comedy']


In [12]:
# Cross-validation for SVD model
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.7146  0.2139  1.2089  1.2980  0.7709  1.0413  0.5109  
MAE (testset)     1.6059  0.2096  1.1746  1.2980  0.7709  1.0118  0.4823  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    


{'test_rmse': array([1.7146198 , 0.21385327, 1.20888625, 1.29802981, 0.77092077]),
 'test_mae': array([1.60592921, 0.20960039, 1.17456431, 1.29802981, 0.77092077]),
 'fit_time': (0.00015616416931152344,
  8.392333984375e-05,
  6.914138793945312e-05,
  7.009506225585938e-05,
  6.413459777832031e-05),
 'test_time': (2.002716064453125e-05,
  1.3113021850585938e-05,
  5.9604644775390625e-06,
  5.9604644775390625e-06,
  2.6226043701171875e-06)}