<a href="https://colab.research.google.com/github/saikrishna7600/collabarative-filtering/blob/main/New_collaborative_filtering_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports and Warnings

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install implicit
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import implicit
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


Collecting implicit
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: implicit
Successfully installed implicit-0.7.2


Load Data

In [3]:
# Load the data
ratings = pd.read_csv('https://raw.githubusercontent.com/saikrishna7600/collabarative-filtering/refs/heads/main/ratings.csv')
movies = pd.read_csv('https://raw.githubusercontent.com/saikrishna7600/collabarative-filtering/refs/heads/main/movies.csv')
# Display the first few rows of the ratings dataframe
ratings.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


Create Matrix Function

In [4]:
def create_X(df):
    N = df['userId'].nunique()
    M = df['movieId'].nunique()

    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))

    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"])))

    user_index = [user_mapper[i] for i in df['userId']]
    movie_index = [movie_mapper[i] for i in df['movieId']]

    X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(ratings)


Install and Import thefuzz

In [5]:
!pip install thefuzz
from thefuzz import fuzz
from thefuzz import process
import pandas as pd
def movie_finder(title):
    all_titles = movies['title'].tolist()
    closest_match = process.extractOne(title, all_titles)
    return closest_match[0]

movie_title_mapper = dict(zip(movies['title'], movies['movieId']))
movie_title_inv_mapper = dict(zip(movies['movieId'], movies['title']))

def get_movie_index(title):
    fuzzy_title = movie_finder(title)
    movie_id = movie_title_mapper[fuzzy_title]
    movie_idx = movie_mapper[movie_id]
    return movie_idx

def get_movie_title(movie_idx):
    movie_id = movie_inv_mapper.get(movie_idx, None)  # Use .get to handle missing keys
    if movie_id is None:
        return "Unknown Movie"
    title = movie_title_inv_mapper.get(movie_id, "Unknown Movie")  # Use .get to handle missing keys
    return title

get_movie_index('Legally Blonde')
get_movie_title(3282)


Collecting thefuzz
  Downloading thefuzz-0.22.1-py3-none-any.whl.metadata (3.9 kB)
Collecting rapidfuzz<4.0.0,>=3.0.0 (from thefuzz)
  Downloading rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading thefuzz-0.22.1-py3-none-any.whl (8.2 kB)
Downloading rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, thefuzz
Successfully installed rapidfuzz-3.11.0 thefuzz-0.22.1


'Legally Blonde (2001)'

Restrict BLAS Thread Usage

In [6]:
import threadpoolctl

# Restrict BLAS thread usage
threadpoolctl.threadpool_limits(1, "blas")


<threadpoolctl.threadpool_limits at 0x79c88cc37910>

Train the ALS Model

In [7]:
# Train the ALS model with more iterations and factors if necessary
model = implicit.als.AlternatingLeastSquares(factors=100, iterations=50)
model.fit(X)


  0%|          | 0/50 [00:00<?, ?it/s]

Find Related Movies

In [8]:
movie_of_interest = 'Strange Days'
movie_index = get_movie_index(movie_of_interest)
related = model.similar_items(movie_index)
print(list(zip(related[0], related[1])))


[(167, 1.0), (28, 0.45436358), (396, 0.4085112), (35, 0.39566344), (308, 0.3453503), (420, 0.3341473), (187, 0.33331585), (151, 0.33330858), (15, 0.3178512), (501, 0.31554785)]


In [9]:
print(f"Because you watched {movie_finder(movie_of_interest)}...")
for r in zip(related[0], related[1]):
    recommended_title = get_movie_title(int(r[0]))  # Convert r[0] to int
    if recommended_title != movie_finder(movie_of_interest):
        print(recommended_title)


Because you watched Strange Days (1995)...
City of Lost Children, The (Cité des enfants perdus, La) (1995)
Free Willy (1993)
Clueless (1995)
Client, The (1994)
Killing Zoe (1994)
Cure, The (1995)
Mad Love (1995)
Casino (1995)
Celluloid Closet, The (1995)


User Ratings

In [10]:
user_id = 96
user_ratings = ratings[ratings['userId'] == user_id].merge(movies[['movieId', 'title']], on='movieId')
user_ratings = user_ratings.sort_values('rating', ascending=False)
print(f"Number of movies rated by user {user_id}: {user_ratings['movieId'].nunique()}")



Number of movies rated by user 96: 78


In [11]:
# Sorting and selecting top 5 movies
user_ratings = ratings[ratings['userId'] == user_id].merge(movies[['movieId', 'title']], on='movieId')
user_ratings = user_ratings.sort_values('rating', ascending=False)
top_5 = user_ratings.head(5)
top_5



Unnamed: 0,userId,movieId,rating,timestamp,title
0,96,1,5.0,964772990,Toy Story (1995)
51,96,2058,5.0,964772862,"Negotiator, The (1998)"
33,96,1210,5.0,964772712,Star Wars: Episode VI - Return of the Jedi (1983)
32,96,1200,5.0,964772731,Aliens (1986)
31,96,1198,5.0,964772622,Raiders of the Lost Ark (Indiana Jones and the...


In [12]:
bottom_5 = user_ratings[user_ratings['rating'] < 3].tail()
bottom_5


Unnamed: 0,userId,movieId,rating,timestamp,title
10,96,306,1.0,964773021,Three Colors: Red (Trois couleurs: Rouge) (1994)
5,96,193,1.0,964774290,Showgirls (1995)
66,96,3033,1.0,964773251,Spaceballs (1987)
23,96,788,1.0,964773714,"Nutty Professor, The (1996)"
11,96,307,1.0,964773216,Three Colors: Blue (Trois couleurs: Bleu) (1993)


Recommendations for a User

In [13]:
# Generate a transpose of the X matrix
X_t = X.T.tocsr()

# Check if the user_id is in user_mapper
if user_id in user_mapper:
    user_idx = user_mapper[user_id]

    # Get recommendations for the specified user index
    recommendations = model.recommend(user_idx, X_t, N=10, filter_already_liked_items=False)

    for r in recommendations:
        movie_idx = int(r[0])  # Ensure movie_idx is an integer
        if movie_idx in movie_inv_mapper:
            recommended_title = get_movie_title(movie_idx)
            print(recommended_title)
        else:
            print(f"Movie index {movie_idx} not found in movie_inv_mapper")
else:
    print(f"User ID {user_id} not found in user_mapper")


Coneheads (1993)
Jumanji (1995)
