<a href="https://colab.research.google.com/github/k-dinakaran/movie-recommendation-system-using-collaborative-filtering/blob/main/movie_recommendation_system_using_collaborative_filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl (8.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: implicit
Successfully installed implicit-0.7.2


In [2]:
import os
import kagglehub
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares

# Download dataset
path = kagglehub.dataset_download("grouplens/movielens-20m-dataset")
print("Path to dataset files:", path)

# Define dataset path
path = "/root/.cache/kagglehub/datasets/grouplens/movielens-20m-dataset/versions/1"
files = os.listdir(path)
print("Files in the dataset directory:", files)

# Load Ratings Data
ratings_file = os.path.join(path, "rating.csv")
ratings = pd.read_csv(ratings_file, usecols=["userId", "movieId", "rating"], dtype={"userId": np.int32, "movieId": np.int32, "rating": np.float32})
print("Ratings dataset sample:")
print(ratings.head())

# Load Movies Data
movies_file = os.path.join(path, "movie.csv")
movies = pd.read_csv(movies_file, usecols=["movieId", "title"], dtype={"movieId": np.int32, "title": str})
print("Movies dataset sample:")
print(movies.head())

# Convert to Sparse Matrix
user_item_matrix = csr_matrix((ratings['rating'], (ratings['userId'], ratings['movieId'])))

# Train ALS Model
als_model = AlternatingLeastSquares(factors=50, regularization=0.1, iterations=20)
als_model.fit(user_item_matrix)

# Function to recommend movies
def recommend_movies(user_id, n=5):
    user_id -= 1  # Adjust for zero-based indexing
    if user_id not in range(user_item_matrix.shape[0]):
        print("User ID not found!")
        return []

    recommendations = als_model.recommend(user_id, user_item_matrix[user_id], N=n)
    recommended_movies = [(movies[movies['movieId'] == movie_id]['title'].values[0], score) for movie_id, score, *_ in recommendations if not movies[movies['movieId'] == movie_id].empty]
    return recommended_movies

Downloading from https://www.kaggle.com/api/v1/datasets/download/grouplens/movielens-20m-dataset?dataset_version_number=1...


100%|██████████| 195M/195M [00:09<00:00, 21.0MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/grouplens/movielens-20m-dataset/versions/1
Files in the dataset directory: ['tag.csv', 'genome_tags.csv', 'rating.csv', 'link.csv', 'movie.csv', 'genome_scores.csv']
Ratings dataset sample:
   userId  movieId  rating
0       1        2     3.5
1       1       29     3.5
2       1       32     3.5
3       1       47     3.5
4       1       50     3.5
Movies dataset sample:
   movieId                               title
0        1                    Toy Story (1995)
1        2                      Jumanji (1995)
2        3             Grumpier Old Men (1995)
3        4            Waiting to Exhale (1995)
4        5  Father of the Bride Part II (1995)


  check_blas_config()


  0%|          | 0/20 [00:00<?, ?it/s]

In [6]:
# User input
user_id = int(input("Enter a User ID: "))
n_recommendations = int(input("Enter number of recommendations: "))

# Generate recommendations
recommended_movies = recommend_movies(user_id, n_recommendations)
print(f"Recommended Movies for User {user_id}:")
for movie, rating in recommended_movies:
    print(f"{movie} (Predicted Score: {rating:.2f})")

Enter a User ID: 5
Enter number of recommendations: 3
Recommended Movies for User 5:
Mrs. Doubtfire (1993) (Predicted Score: 597.00)
