# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterSampler

# Loading Data

In [2]:
def load_data(movies_path, ratings_path):
    movies = pd.read_csv(movies_path)
    ratings = pd.read_csv(ratings_path)
    return movies, ratings

movies_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/main/Dataset/movies.csv'
ratings_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/master/Dataset/ratings.csv'
movies, ratings = load_data(movies_path, ratings_path)

# Data Preprocessing

In [3]:
param_space = {
    'min_user_votes': range(10, 101),
    'min_movie_votes': range(30, 101)
}
n_iter = 100
best_score = float('inf')
best_params = None
param_sampler = ParameterSampler(param_space, n_iter=n_iter, random_state=42)
for params in param_sampler:
    filtered_ratings = ratings.copy()
    user_counts = filtered_ratings['userId'].value_counts()
    movie_counts = filtered_ratings['movieId'].value_counts()
    filtered_ratings = filtered_ratings[filtered_ratings['userId'].isin(user_counts[user_counts >= params['min_user_votes']].index)]
    filtered_ratings = filtered_ratings[filtered_ratings['movieId'].isin(movie_counts[movie_counts >= params['min_movie_votes']].index)]
    sparsity = 1 - len(filtered_ratings) / (len(ratings) * len(movies))
    if sparsity < best_score:
        best_score = sparsity
        best_params = params

print("Best parameters:", best_params)
print("Best sparsity score:", best_score)

Best parameters: {'min_user_votes': 15, 'min_movie_votes': 31}
Best sparsity score: 0.9999416111038673


In [4]:
def preprocess_data(ratings, min_user_votes = 15, min_movie_votes = 31):
    user_counts = ratings['userId'].value_counts()
    movie_counts = ratings['movieId'].value_counts()

    ratings = ratings[ratings['userId'].isin(user_counts[user_counts >= min_user_votes].index)]
    ratings = ratings[ratings['movieId'].isin(movie_counts[movie_counts >= min_movie_votes].index)]

    final_dataset = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
    return final_dataset

final_dataset = preprocess_data(ratings)
movies['genres'] = movies['genres'].str.replace('|', ' ')

# Training Model

In [5]:
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)

# Predicting Movies

In [11]:
def suggest_movies_for_user(user_id, movies, model):
    user_ratings = final_dataset[user_id]
    unrated_movies = user_ratings[user_ratings == 0].index.tolist() 
    predictions = model.predict(final_dataset.drop(columns=[user_id])).tolist()
    suggestions = pd.DataFrame({'movieId': final_dataset.index, 'predicted_rating': predictions})
    suggestions = suggestions[suggestions['movieId'].isin(unrated_movies)]
    suggestions = suggestions.merge(movies[['movieId', 'title', 'genres']], on='movieId')
    return suggestions[['title', 'genres']]


user_id = 1
user_id_to_drop = user_id if user_id in final_dataset.columns else None
random_forest.fit(final_dataset.drop(columns=[user_id_to_drop]), final_dataset[user_id])
recommended_movies = suggest_movies_for_user(user_id, movies, random_forest)
print("Recommended movies for user with ID", user_id, ":")
print(recommended_movies.head(10))

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Recommended movies for user with ID 127 :
                                title  \
0                    Toy Story (1995)   
1                      Jumanji (1995)   
2             Grumpier Old Men (1995)   
3  Father of the Bride Part II (1995)   
4                         Heat (1995)   
5                      Sabrina (1995)   
6                    GoldenEye (1995)   
7      American President, The (1995)   
8                       Casino (1995)   
9        Sense and Sensibility (1995)   

                                        genres  
0  Adventure Animation Children Comedy Fantasy  
1                   Adventure Children Fantasy  
2                               Comedy Romance  
3                                       Comedy  
4                        Action Crime Thriller  
5                               Comedy Romance  
6                    Action Adventure Thriller  
7                         Comedy Drama Romance  
8                                  Crime Drama  
9               

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
