<a href="https://colab.research.google.com/github/meetmehedi/Movie_Recommendation_System/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
!pip install lightfm scikit-learn pandas scipy

Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
[33m  DEPRECATION: Building 'lightfm' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'lightfm'. Discussion can be found at https://github.com/pypa/pip/issues/6334[0m[33m
[0m  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp311-cp311-linux_x86_64.whl size=831126 sha256=2b6f4e1b7055446e288c4b715fff4b6acce642ba0eaad452fb276dd171b61657
  Stored in directory: /root/.cache/pip/wheels/b9/0d/8a/0729d2e6e3ca2a898ba55201f905da7db3f838a33df5b3fcdd
Successfully built lightfm
In

In [18]:
import pandas as pd

# Load ratings
ratings = pd.read_csv('/content/drive/MyDrive/ml-32m/ratings.csv')  # movieId, userId, rating, timestamp
movies = pd.read_csv('/content/drive/MyDrive/ml-32m/movies.csv')    # movieId, title, genres

In [19]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit(
    users=ratings['userId'].unique(),
    items=ratings['movieId'].unique()
)

(interactions, weights) = dataset.build_interactions(
    [(row['userId'], row['movieId']) for _, row in ratings.iterrows()]
)

In [20]:
from lightfm import LightFM

model = LightFM(loss='warp')  # 'warp' = ranking-based loss (better than regression for recommendations)
model.fit(interactions, epochs=10, num_threads=2)

<lightfm.lightfm.LightFM at 0x7865ff28f910>

In [21]:
import numpy as np

def recommend_movies(model, dataset, user_id, movies_df, n=5):
    n_users, n_items = interactions.shape
    user_x = dataset.mapping()[0][user_id]

    scores = model.predict(user_x, np.arange(n_items))
    top_items = np.argsort(-scores)[:n]

    movie_mapping = {v: k for k, v in dataset.mapping()[2].items()}
    movie_ids = [movie_mapping[i] for i in top_items]

    return movies_df[movies_df['movieId'].isin(movie_ids)][['title', 'genres']]

# Example
recommend_movies(model, dataset, user_id=1, movies_df=movies)

Unnamed: 0,title,genres
292,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
522,Schindler's List (1993),Drama|War
585,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
600,Fargo (1996),Comedy|Crime|Drama|Thriller
840,"Godfather, The (1972)",Crime|Drama


In [29]:
from lightfm.data import Dataset
# movie_genres = movies[['movieId', 'genres']].copy()
# movie_genres['genres'] = movie_genres['genres'].str.split('|')

# Get all unique genres
all_genres = set()
for genres_list in movies['genres'].str.split('|').dropna():
    for genre in genres_list:
        all_genres.add(genre)

# Fit the dataset with user IDs, movie IDs, and all unique genres as item features
dataset = Dataset()
dataset.fit(
    users=ratings['userId'].unique(),
    items=ratings['movieId'].unique(),
    item_features=list(all_genres) # Fit with all unique genres as item features
)

(interactions, weights) = dataset.build_interactions(
    [(row['userId'], row['movieId']) for _, row in ratings.iterrows()]
)

# Filter movies to include only those present in ratings
movies_in_ratings = movies[movies['movieId'].isin(ratings['movieId'].unique())].copy()

# Build item features based on the genres for each movie
item_features = dataset.build_item_features(
    [(row['movieId'], row['genres'].split('|')) for _, row in movies_in_ratings.iterrows()]
)

# Re-initialize and fit the model with item features
from lightfm import LightFM

model = LightFM(loss='warp')
model.fit(interactions, item_features=item_features, epochs=10, num_threads=2)

<lightfm.lightfm.LightFM at 0x7865f8b13550>

In [30]:
# 📦 Step 1: Install necessary libraries
# Run this cell only if LightFM is not installed
# !pip install lightfm

# 📚 Step 2: Import libraries
import pickle
import numpy as np
from lightfm import LightFM
from scipy.sparse import csr_matrix

# 🔄 Step 3: Load the pickled LightFM model
model_path = '/content/lightfm_model.pkl'

with open(model_path, 'rb') as f:
    model = pickle.load(f)

print("✅ Model loaded successfully.")

# 📊 Step 4: Simulate an interaction matrix (replace with your actual data)
# Let's assume we have 5 users and 10 items for demo purposes
n_users = 5
n_items = 10

# Create a dummy interaction matrix (random for demonstration)
interactions = csr_matrix(np.random.randint(2, size=(n_users, n_items)))

print("📊 Dummy interaction matrix created.")

# 🔍 Step 5: Predict score for a specific user-item pair
user_id = 0
item_id = 3

score = model.predict(user_id, [item_id])
print(f"Predicted score for user {user_id} and item {item_id}: {score[0]}")

# 🎯 Step 6: Function to recommend top N items for a user
def recommend_items(model, user_id, interactions, n=5):
    n_items = interactions.shape[1]
    scores = model.predict(user_id, np.arange(n_items))
    top_items = np.argsort(-scores)[:n]
    return top_items, scores[top_items]

# 🧪 Step 7: Example recommendation
top_items, top_scores = recommend_items(model, user_id=0, interactions=interactions, n=5)

print(f"\n🎁 Top 5 recommendations for user 0:")
for i, (item, score) in enumerate(zip(top_items, top_scores)):
    print(f"{i+1}. Item {item} — Score: {score:.4f}")

✅ Model loaded successfully.
📊 Dummy interaction matrix created.
Predicted score for user 0 and item 3: 0.9442452788352966

🎁 Top 5 recommendations for user 0:
1. Item 9 — Score: 2.5728
2. Item 6 — Score: 2.4556
3. Item 0 — Score: 2.4431
4. Item 8 — Score: 2.2805
5. Item 1 — Score: 2.2716
