# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterSampler

# Loading Data

In [2]:
# Define function to load data
def load_data(movies_path, ratings_path):
    movies = pd.read_csv(movies_path)
    ratings = pd.read_csv(ratings_path)
    return movies, ratings

# Load data
movies_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/main/Dataset/movies.csv'
ratings_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/master/Dataset/ratings.csv'
movies, ratings = load_data(movies_path, ratings_path)

# Data Preprocessing

In [3]:
param_space = {
    'min_user_votes': range(10, 101),
    'min_movie_votes': range(30, 101)
}
n_iter = 100
best_score = float('inf')
best_params = None
param_sampler = ParameterSampler(param_space, n_iter=n_iter, random_state=42)
for params in param_sampler:
    filtered_ratings = ratings.copy()
    user_counts = filtered_ratings['userId'].value_counts()
    movie_counts = filtered_ratings['movieId'].value_counts()
    filtered_ratings = filtered_ratings[filtered_ratings['userId'].isin(user_counts[user_counts >= params['min_user_votes']].index)]
    filtered_ratings = filtered_ratings[filtered_ratings['movieId'].isin(movie_counts[movie_counts >= params['min_movie_votes']].index)]
    sparsity = 1 - len(filtered_ratings) / (len(ratings) * len(movies))
    if sparsity < best_score:
        best_score = sparsity
        best_params = params

print("Best parameters:", best_params)
print("Best sparsity score:", best_score)

def preprocess_data(ratings, min_user_votes = 15, min_movie_votes = 31):
    user_counts = ratings['userId'].value_counts()
    movie_counts = ratings['movieId'].value_counts()

    ratings = ratings[ratings['userId'].isin(user_counts[user_counts >= min_user_votes].index)]
    ratings = ratings[ratings['movieId'].isin(movie_counts[movie_counts >= min_movie_votes].index)]

    final_dataset = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
    return final_dataset

final_dataset = preprocess_data(ratings)
movies['genres'] = movies['genres'].str.replace('|', ' ')

Best parameters: {'min_user_votes': 15, 'min_movie_votes': 31}
Best sparsity score: 0.9999416111038673


# Training Model

In [4]:
# Split data into train and test sets (optional)
train_data, test_data = train_test_split(final_dataset, test_size=0.2, random_state=42)

# Train Random Forest model
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)

# Predicting Movies

In [5]:
# Split data into train and test sets (optional)
train_data, test_data = train_test_split(final_dataset, test_size=0.2, random_state=42)

# Train Random Forest model
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)

# Function to suggest movies for a given user ID
def suggest_movies_for_user(user_id, movies, model):
    user_ratings = final_dataset[user_id]
    unrated_movies = user_ratings[user_ratings == 0].index.tolist()  # Movies not rated by the user
    predictions = model.predict(final_dataset.drop(columns=[user_id])).tolist()

    # Create DataFrame with movie IDs and predicted ratings
    suggestions = pd.DataFrame({'movieId': final_dataset.index, 'predicted_rating': predictions})

    # Filter suggestions for unrated movies
    suggestions = suggestions[suggestions['movieId'].isin(unrated_movies)]

    # Merge with movies DataFrame to get movie titles
    suggestions = suggestions.merge(movies, on='movieId')

    # Sort suggestions by predicted rating in descending order
    suggestions = suggestions.sort_values(by='predicted_rating', ascending=False)

    return suggestions[['movieId', 'title', 'genres', 'predicted_rating']]

# Example: Suggest movies for user with ID 1
user_id = 1
user_id_to_drop = user_id if user_id in final_dataset.columns else None
random_forest.fit(final_dataset.drop(columns=[user_id_to_drop]), final_dataset[user_id])
recommended_movies = suggest_movies_for_user(user_id, movies, random_forest)
print("Recommended movies for user with ID", user_id, ":")
print(recommended_movies.head(10))

Recommended movies for user with ID 1 :
     movieId                                      title  \
13        32  Twelve Monkeys (a.k.a. 12 Monkeys) (1995)   
329     2762                    Sixth Sense, The (1999)   
268     1968                 Breakfast Club, The (1985)   
220     1387                                Jaws (1975)   
337     2918            Ferris Bueller's Day Off (1986)   
83       380                           True Lies (1994)   
152      924               2001: A Space Odyssey (1968)   
159     1036                            Die Hard (1988)   
113      589          Terminator 2: Judgment Day (1991)   
228     1527                  Fifth Element, The (1997)   

                                       genres  predicted_rating  
13                    Mystery Sci-Fi Thriller              1.45  
329                      Drama Horror Mystery              1.41  
268                              Comedy Drama              1.38  
220                             Action Horror 