In [1]:
#--------Code by Basudeb Roy-------
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Load the ratings and movies datasets
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Create the user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')

# Fill NaN values with 0
user_item_matrix_filled = user_item_matrix.fillna(0)

# Compute collaborative filtering (item-based) similarity using cosine similarity
collab_item_similarity = cosine_similarity(user_item_matrix_filled.T)
collab_item_similarity_df = pd.DataFrame(collab_item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# Content-based filtering using movie genres (or other attributes)
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN in 'genres' with an empty string
movies['genres'] = movies['genres'].fillna('')

# Calculate the TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity for content-based filtering
content_item_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
content_item_similarity_df = pd.DataFrame(content_item_similarity, index=movies['movieId'], columns=movies['movieId'])

# Function to get hybrid recommendations
def get_hybrid_recommendations(movie_id, user_item_matrix, collab_item_similarity_df, content_item_similarity_df, movies_df, top_n=10, alpha=0.5):
    if movie_id not in collab_item_similarity_df.columns or movie_id not in content_item_similarity_df.columns:
        return "Movie not found in the dataset."
    
    # Collaborative filtering similarity scores
    collab_scores = collab_item_similarity_df[movie_id]
    
    # Content-based filtering similarity scores
    content_scores = content_item_similarity_df[movie_id]
    
    # Combine scores (you can adjust the weight `alpha` as needed)
    hybrid_scores = (alpha * collab_scores) + ((1 - alpha) * content_scores)
    
    # Drop the input movie to avoid recommending it
    hybrid_scores = hybrid_scores.drop(movie_id)
    
    # Sort movies by hybrid score
    similar_movies_ids = hybrid_scores.sort_values(ascending=False).index[:top_n]
    
    # Map movie IDs to movie names
    similar_movies = movies_df[movies_df['movieId'].isin(similar_movies_ids)]['title'].values
    
    return similar_movies

# Example movieId
example_movie_id = 1  # Adjust this to a valid movieId from your dataset

# Test the hybrid recommendation function
recommended_movies = get_hybrid_recommendations(example_movie_id, user_item_matrix_filled, collab_item_similarity_df, content_item_similarity_df, movies)
print("Recommended Movies:")
for movie in recommended_movies:
    print(movie)


Recommended Movies:
Antz (1998)
Bug's Life, A (1998)
Toy Story 2 (1999)
Emperor's New Groove, The (2000)
Shrek (2001)
Monsters, Inc. (2001)
Ice Age (2002)
Finding Nemo (2003)
Incredibles, The (2004)
Inside Out (2015)


In [3]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Example placeholder functions (implement these according to your needs)
def get_hybrid_recommendations(movie_id, user_item_matrix, collab_item_similarity_df, content_item_similarity_df, movies_df):
    # This function should return a list of recommended movie IDs
    # For example:
    return [2, 3, 4, 5]  # Placeholder: replace with actual recommendation logic

def get_prediction_for_movie(movie_id):
    # This function should return the predicted rating for a given movie_id
    # For example:
    return 3.5  # Placeholder: replace with actual prediction logic

# Ensure ratings DataFrame is defined
ratings = pd.DataFrame({
    'movieId': [1, 2, 3, 4, 5],
    'rating': [4, 3, 5, 2, 4]
})

# Define your input data
user_item_matrix = None  # Replace with actual matrix
collab_item_similarity_df = None  # Replace with actual DataFrame
content_item_similarity_df = None  # Replace with actual DataFrame
movies_df = None  # Replace with actual DataFrame

# Generate predictions
def get_predictions(movie_id, user_item_matrix, collab_item_similarity_df, content_item_similarity_df, movies_df):
    recommendations = get_hybrid_recommendations(movie_id, user_item_matrix, collab_item_similarity_df, content_item_similarity_df, movies_df)
    
    # Collect true ratings for the recommended movies
    true_ratings = ratings[ratings['movieId'].isin(recommendations)]['rating']
    
    # Generate predicted ratings
    predicted_ratings = np.array([get_prediction_for_movie(m) for m in recommendations])
    
    # Handle cases where there are no true ratings for the recommended movies
    if true_ratings.empty or len(true_ratings) != len(predicted_ratings):
        raise ValueError("Mismatch between true ratings and predicted ratings or no true ratings available.")
    
    # Compute MAE and RMSE
    mae = mean_absolute_error(true_ratings, predicted_ratings)
    rmse = np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
    
    return mae, rmse

# Example usage
movie_id = 1
mae, rmse = get_predictions(movie_id, user_item_matrix, collab_item_similarity_df, content_item_similarity_df, movies_df)
print(f"MAE: {mae}, RMSE: {rmse}")


MAE: 1.0, RMSE: 1.118033988749895
