In [1]:

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# Step 1: Install Dependencies (Surprise Library)
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━[0m [32m122.9/154.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2469538 sha256=159a1716a96376881b0eef3b96d50d9ec97b1b5efb186b65f42c6c6c18909c2f
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/

In [8]:
# Step 2: Imports
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [4]:
# Step 1: Uninstall both (clean slate)
!pip uninstall -y numpy scikit-surprise

# Step 2: Reinstall numpy first (compatible)
!pip install numpy==1.24.4

# Step 3: Then reinstall scikit-surprise
!pip install scikit-surprise


Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m126.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.4 which is incompatible.
xarray-einstats 0.9.1 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
pymc 5.2

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [1]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split


In [5]:
# Step 3: Load Data (Adjust path as needed if using Google Drive)
import pandas as pd

# **IMPORTANT**: Replace '/content/drive/My Drive/ml-1m/' with the actual path to your 'ml-1m' folder in Google Drive
drive_path = '/content/drive/My Drive/ml-1m/'

movies = pd.read_csv(drive_path + 'movies.dat', sep='::', engine='python',
                     names=['movieId', 'title', 'genres'], encoding='latin-1')
ratings = pd.read_csv(drive_path + 'ratings.dat', sep='::', engine='python',
                      names=['userId', 'movieId', 'rating', 'timestamp'], encoding='latin-1')

In [6]:
# Step 4: Train SVD Collaborative Filtering Model
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)
model = SVD()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7e757b890f10>

In [9]:
# Step 5: Build Content-Based Filtering with TF-IDF on Genres
movies['genres_clean'] = movies['genres'].str.replace('|', ' ', regex=False)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres_clean'])
genre_sim = linear_kernel(tfidf_matrix, tfidf_matrix)  # cosine similarity

In [10]:
# Step 6: Hybrid Recommendation Function
def hybrid_recommend(user_id, top_n=5, alpha=0.6):
    movie_ids = movies['movieId'].tolist()
    svd_scores = []

    for movie_id in movie_ids:
        try:
            score = model.predict(user_id, movie_id).est
            svd_scores.append(score)
        except:
            svd_scores.append(0)

    svd_scores = np.array(svd_scores)

    user_rated = ratings[ratings.userId == user_id].sort_values(by='rating', ascending=False)
    if user_rated.empty:
        content_scores = np.zeros(len(movies))
    else:
        top_movie_index = movies[movies.movieId == user_rated.iloc[0]['movieId']].index[0]
        content_scores = genre_sim[top_movie_index]

    final_score = alpha * svd_scores + (1 - alpha) * content_scores
    top_indices = final_score.argsort()[-top_n:][::-1]

    return movies.iloc[top_indices][['title', 'genres']]

In [11]:
# Step 7: Test the Hybrid Recommender
user_to_test = 100
print(f"\n🔮 Top Movie Recommendations for User {user_to_test} (Hybrid SVD + Genres):")
print(hybrid_recommend(user_to_test, top_n=7))


🔮 Top Movie Recommendations for User 100 (Hybrid SVD + Genres):
                                                  title  \
892                                  Rear Window (1954)   
1194                              Third Man, The (1949)   
2139                          Lady Vanishes, The (1938)   
941                        It's a Wonderful Life (1946)   
3732                         Anatomy of a Murder (1959)   
938                                Thin Man, The (1934)   
1950  Seven Samurai (The Magnificent Seven) (Shichin...   

                               genres  
892                  Mystery|Thriller  
1194                 Mystery|Thriller  
2139  Comedy|Mystery|Romance|Thriller  
941                             Drama  
3732                    Drama|Mystery  
938                           Mystery  
1950                     Action|Drama  


In [12]:
# Step 8: Explainable Output

def explain_recommendation(user_id, top_n=5):
    recommendations = hybrid_recommend(user_id, top_n)
    user_history = ratings[ratings.userId == user_id].sort_values(by='rating', ascending=False)
    top_movie_id = user_history.iloc[0]['movieId']
    top_movie_title = movies[movies.movieId == top_movie_id]['title'].values[0]
    reason = f"Because you liked '{top_movie_title}', which shares similar genres."
    print("\n📢 Explanation for Recommendations:")
    print(reason)
    print("\nTop Recommendations:")
    print(recommendations)


In [13]:
# Test with explanation
explain_recommendation(100, top_n=5)


📢 Explanation for Recommendations:
Because you liked 'Lone Star (1996)', which shares similar genres.

Top Recommendations:
                             title                           genres
892             Rear Window (1954)                 Mystery|Thriller
1194         Third Man, The (1949)                 Mystery|Thriller
2139     Lady Vanishes, The (1938)  Comedy|Mystery|Romance|Thriller
941   It's a Wonderful Life (1946)                            Drama
3732    Anatomy of a Murder (1959)                    Drama|Mystery


In [14]:
# Step 9: Evaluation - RMSE on test set
from surprise import accuracy
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
print("\nModel Evaluation:\nRoot Mean Squared Error (RMSE):", rmse)

RMSE: 0.8736

Model Evaluation:
Root Mean Squared Error (RMSE): 0.8735813970721767


In [15]:
# Step 10: Optional - Save Model & Load Later (Using Pickle)
import pickle

# Save model
with open('svd_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [16]:
# Step 11: Optional - Recommend Based on Mood
mood_genre_map = {
    'happy': ['Comedy', 'Romance', "Animation"],
    'sad': ['Drama', 'Film-Noir'],
    'excited': ['Action', 'Thriller'],
    'relaxed': ['Documentary', 'Music']
}


In [17]:
def recommend_by_mood(mood, top_n=5):
    genres = mood_genre_map.get(mood.lower(), [])
    matches = movies[movies['genres'].apply(lambda g: any(genre in g for genre in genres))]
    return matches[['title', 'genres']].sample(n=min(top_n, len(matches)))

print("\n🎭 Mood-Based Recommendations (Mood: Excited):")
print(recommend_by_mood("excited", top_n=5))


🎭 Mood-Based Recommendations (Mood: Excited):
                                        title                   genres
983                  Last Man Standing (1996)     Action|Drama|Western
2251                         Apt Pupil (1998)           Drama|Thriller
671   Some Folks Call It a Sling Blade (1993)           Drama|Thriller
2150                           Murder! (1930)         Mystery|Thriller
3204                          Scream 3 (2000)  Horror|Mystery|Thriller
