# Importing the Libraries

In [23]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import ParameterSampler
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

# Loading the Dataset

In [24]:
def load_data(movies_path, ratings_path):
    movies = pd.read_csv(movies_path)
    ratings = pd.read_csv(ratings_path)
    return movies, ratings

movies_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/master/Dataset/movies.csv'
ratings_path = 'https://raw.githubusercontent.com/Bansal0527/Movie-Recomendation-System/master/Dataset/ratings.csv'
movies, ratings = load_data(movies_path, ratings_path)

# Preprocessing the Dataset

In [25]:
param_space = {
    'min_user_votes': range(10, 101),
    'min_movie_votes': range(30, 101)
}
n_iter = 100
best_score = float('inf')
best_params = None
param_sampler = ParameterSampler(param_space, n_iter=n_iter, random_state=42)
for params in param_sampler:
    filtered_ratings = ratings.copy()
    user_counts = filtered_ratings['userId'].value_counts()
    movie_counts = filtered_ratings['movieId'].value_counts()
    filtered_ratings = filtered_ratings[filtered_ratings['userId'].isin(user_counts[user_counts >= params['min_user_votes']].index)]
    filtered_ratings = filtered_ratings[filtered_ratings['movieId'].isin(movie_counts[movie_counts >= params['min_movie_votes']].index)]
    sparsity = 1 - len(filtered_ratings) / (len(ratings) * len(movies))
    if sparsity < best_score:
        best_score = sparsity
        best_params = params

print("Best parameters:", best_params)
print("Best sparsity score:", best_score)

Best parameters: {'min_user_votes': 15, 'min_movie_votes': 31}
Best sparsity score: 0.9999416111038673


In [26]:
def preprocess_data(ratings, min_user_votes = 15, min_movie_votes = 31):
    user_counts = ratings['userId'].value_counts()
    movie_counts = ratings['movieId'].value_counts()

    ratings = ratings[ratings['userId'].isin(user_counts[user_counts >= min_user_votes].index)]
    ratings = ratings[ratings['movieId'].isin(movie_counts[movie_counts >= min_movie_votes].index)]

    final_dataset = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
    return final_dataset

final_dataset = preprocess_data(ratings)
movies['genres'] = movies['genres'].str.replace('|', ' ')

# Training the Singular Value Decomposition (SVD) Model

In [27]:
def collaborative_filtering_svd(final_dataset, k=50):
    sparse_final_dataset = csr_matrix(final_dataset.values)
    U, sigma, Vt = svds(sparse_final_dataset, k=k)
    sigma = np.diag(sigma)
    predicted_ratings = np.dot(np.dot(U, sigma), Vt)
    return predicted_ratings

# Collaborative filtering SVD
predicted_ratings = collaborative_filtering_svd(final_dataset)

# Movie Recommendation System

Movie recommendation on the basis of the model trained

In [39]:
def recommend_movies(predictions, movies_df, userID, num_recommendations=10):
    user_row_number = userID - 1  # UserID starts at 1, so we need to convert to zero-based index

    # Get and sort the user's predictions
    sorted_user_predictions = predictions[user_row_number].argsort()[::-1]

    # Filter out movies that the user has already rated
    user_rated_movies = ratings[ratings['userId'] == userID]['movieId'].values

    # Initialize recommendations list
    recommendations = []

    # Iterate through sorted indices
    for movie_index in sorted_user_predictions:
        movie_id = final_dataset.index[movie_index]
        # Check if the movie is not rated by the user
        if movie_id not in user_rated_movies:
            movie_info = movies_df.loc[movies_df['movieId'] == movie_id]
            recommendations.append(movie_info)

            # Break loop if recommendations list has reached the desired length
            if len(recommendations) == num_recommendations:
                break

    # Concatenate movie information into a single DataFrame
    recommendations_df = pd.concat(recommendations)

    return recommendations_df[['movieId', 'title', 'genres']]

# Example usage:
userID = 1  # Set the user ID you want to recommend movies for
recommendations = recommend_movies(predicted_ratings, movies, userID)
print(recommendations)

      movieId                                       title  \
1009     1320              Alien³ (a.k.a. Alien 3) (1992)   
2553     3418                      Thelma & Louise (1991)   
984      1285                             Heathers (1989)   
2515     3363                    American Graffiti (1973)   
929      1228                          Raging Bull (1980)   
1757     2355                        Bug's Life, A (1998)   
1544     2080                   Lady and the Tramp (1955)   
176       208                           Waterworld (1995)   
1058     1375  Star Trek III: The Search for Spock (1984)   
3172     4270                   Mummy Returns, The (2001)   

                                   genres  
1009        Action Horror Sci-Fi Thriller  
2553                Adventure Crime Drama  
984                                Comedy  
2515                         Comedy Drama  
929                                 Drama  
1757  Adventure Animation Children Comedy  
1544    Animation Ch