In [1]:
# ----------code by Sk Rajesh---------

import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

# Load datasets
links = pd.read_csv('links.csv')
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')
tags = pd.read_csv('tags.csv')

# Prepare user-item rating matrix
user_movie_ratings = ratings.pivot_table(index='userId', columns='movieId', values='rating')

# Fill NaN values with 0
user_movie_ratings = user_movie_ratings.fillna(0)

# Matrix Factorization using SVD
def matrix_factorization(ratings_matrix, num_factors=20):
    U, sigma, Vt = svds(ratings_matrix, k=num_factors)
    sigma = np.diag(sigma)
    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
    return all_user_predicted_ratings

# Perform matrix factorization
predicted_ratings = matrix_factorization(user_movie_ratings.values)

# Convert predictions to DataFrame with correct index and columns
user_movie_predictions = pd.DataFrame(predicted_ratings, 
                                       index=user_movie_ratings.index, 
                                       columns=user_movie_ratings.columns)

# Content-Based Filtering using Genre Overlap
def compute_genre_overlap(genres_list):
    genre_matrix = np.zeros((len(genres_list), len(genres_list)))
    
    for i in range(len(genres_list)):
        for j in range(len(genres_list)):
            if i != j:
                overlap = len(set(genres_list[i].split('|')).intersection(set(genres_list[j].split('|'))))
                genre_matrix[i, j] = overlap
    
    return genre_matrix

# Build genre similarity matrix
genres_list = movies['genres'].tolist()
genre_matrix = compute_genre_overlap(genres_list)

# Create a DataFrame for genre similarity matrix
genre_sim_df = pd.DataFrame(genre_matrix, index=movies['movieId'], columns=movies['movieId'])

def hybrid_recommendations(user_id, movie_id, top_n=10):
    # Collaborative filtering recommendations
    if user_id not in user_movie_ratings.index:
        raise ValueError(f"User ID {user_id} not found in the dataset.")
    
    # Predict ratings for all movies
    user_ratings = user_movie_predictions.loc[user_id]
    predicted_ratings = user_ratings.sort_values(ascending=False)
    
    # Get top-N predicted ratings, excluding already rated movies
    collab_based_recs = predicted_ratings.index[predicted_ratings.index != movie_id][:top_n]

    # Content-based recommendations
    similar_movies = genre_sim_df[movie_id].sort_values(ascending=False).index
    content_based_recs = similar_movies[similar_movies != movie_id][:top_n]
    
    # Combine recommendations
    combined_recs = list(set(collab_based_recs) | set(content_based_recs))
    combined_recs_titles = movies[movies['movieId'].isin(combined_recs)]['title']
    
    return combined_recs_titles

# Example usage
user_id = 1  # Replace with an actual user ID
movie_id = 1  # Replace with an actual movie ID
recommendations = hybrid_recommendations(user_id, movie_id)
print("Recommended movies:")
print(recommendations)


Recommended movies:
224             Star Wars: Episode IV - A New Hope (1977)
507                     Terminator 2: Judgment Day (1991)
559                                      Space Jam (1996)
898     Star Wars: Episode V - The Empire Strikes Back...
899                            Princess Bride, The (1987)
900     Raiders of the Lost Ark (Indiana Jones and the...
902                                         Aliens (1986)
911     Star Wars: Episode VI - Return of the Jedi (1983)
939                                Terminator, The (1984)
1503                           Saving Private Ryan (1998)
1939                                   Matrix, The (1999)
2250                      Who Framed Roger Rabbit? (1988)
3194                                         Shrek (2001)
3568                                Monsters, Inc. (2001)
5819                                        Robots (2005)
6626                                     Enchanted (2007)
7760    Asterix and the Vikings (Ast√©rix et les Vik

In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Example DataFrame for ratings, replace with actual data
ratings = pd.DataFrame({
    'userId': [1, 1, 2, 2],
    'movieId': [1, 2, 1, 2],
    'rating': [3.5, 4.0, 2.5, 3.0]
})

# Define these functions according to your actual implementations
def hybrid_recommendations(user_id, movie_id):
    # Replace with your actual recommendation logic
    return [1, 2]  # Example list of recommended movie IDs

def get_prediction_for_movie(movie_id):
    # Replace with your actual prediction logic
    return 3.0  # Example predicted rating

def get_predictions(user_id, movie_id):
    recommendations = hybrid_recommendations(user_id, movie_id)
    
    # Get the true ratings for the user and filter for recommended movies
    true_ratings_df = ratings[(ratings['userId'] == user_id) & (ratings['movieId'].isin(recommendations))]
    true_ratings = true_ratings_df['rating'].values
    
    # Ensure predictions match the recommendations
    predicted_ratings = np.array([get_prediction_for_movie(m) for m in recommendations])
    
    # Check if lengths match
    if len(true_ratings) == 0 or len(predicted_ratings) == 0:
        raise ValueError("No true ratings or predictions available for the given user and recommendations.")
    
    if len(true_ratings) != len(predicted_ratings):
        raise ValueError("Mismatch between the number of true ratings and predicted ratings.")
    
    mae = mean_absolute_error(true_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
    return mae, rmse

# Example usage
user_id = 1
movie_id = 1
mae, rmse = get_predictions(user_id, movie_id)
print(f"MAE: {mae}, RMSE: {rmse}")


MAE: 0.75, RMSE: 0.7905694150420949
