# 🎬 Movie Recommender using Surprise Library

In [5]:
!pip install scikit-surprise

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-macosx_11_0_arm64.whl size=472505 sha256=e8521c2f8f685e97effefdddc619ab3020708fa8a1db68b447c62560ef5f4723
  Stored in directory: /Users/tahami-ahmad/Library/Caches/pip/wheels/75/fa/bc/739bc2cb1fbaab6061854e6cfbb81a0ae52c92a502a7fa454b
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m]

## 📥 Load MovieLens Dataset

In [2]:

from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split

# Load MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)


Dataset ml-100k could not be found. Do you want to download it? [Y/n] 

 y


Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /Users/tahami-ahmad/.surprise_data/ml-100k


## 📊 Train SVD Model and Evaluate

In [5]:

from surprise import SVD
from surprise import accuracy

model = SVD()
model.fit(trainset)
predictions = model.test(testset)
accuracy.rmse(predictions)


RMSE: 0.9386


0.9385623601086499

## 🔮 Predict Sample Rating

In [8]:

pred = model.predict(uid=196, iid=302)
print(f"Predicted rating: {pred.est:.2f}")


Predicted rating: 3.53


## 🏆 Top-N Recommendations

In [11]:

from collections import defaultdict

def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=5)
first_user = list(top_n.keys())[0]
print(f"Top 5 recommendations for user {first_user}:")
for item, score in top_n[first_user]:
    print(f"Item {item}, predicted rating: {score:.2f}")


Top 5 recommendations for user 907:
Item 79, predicted rating: 5.00
Item 172, predicted rating: 5.00
Item 185, predicted rating: 5.00
Item 98, predicted rating: 5.00
Item 144, predicted rating: 4.96


## 🤝 Try KNNBasic Algorithm

In [14]:

from surprise import KNNBasic

knn = KNNBasic()
knn.fit(trainset)
knn_preds = knn.test(testset)
accuracy.rmse(knn_preds)


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9802


0.980150596704479

## 📉 Try BaselineOnly Algorithm

In [17]:

from surprise import BaselineOnly

baseline = BaselineOnly()
baseline.fit(trainset)
baseline_preds = baseline.test(testset)
accuracy.rmse(baseline_preds)


Estimating biases using als...
RMSE: 0.9442


0.9441798799352237

## 🎯 Precision and Recall at K

In [20]:

from surprise.model_selection import cross_validate
from surprise import accuracy

print("Cross-Validation for SVD")
cross_validate(SVD(), data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Cross-Validation for SVD
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9364  0.9403  0.9435  0.9335  0.9352  0.9378  0.0036  
MAE (testset)     0.7391  0.7414  0.7426  0.7362  0.7333  0.7385  0.0034  
Fit time          0.21    0.21    0.21    0.22    0.24    0.22    0.01    
Test time         0.02    0.05    0.02    0.05    0.03    0.04    0.01    


{'test_rmse': array([0.93636056, 0.9402807 , 0.94348033, 0.93353363, 0.93519846]),
 'test_mae': array([0.73912696, 0.7413554 , 0.74257198, 0.73620154, 0.73325276]),
 'fit_time': (0.21038603782653809,
  0.20552396774291992,
  0.206373929977417,
  0.21709799766540527,
  0.24342036247253418),
 'test_time': (0.024439096450805664,
  0.051154136657714844,
  0.02377796173095703,
  0.05498695373535156,
  0.026182889938354492)}