In [3]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic, accuracy
from surprise.model_selection import train_test_split


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\Stevi\OneDrive\Documents\Projects\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\Stevi\OneDrive\Documents\Projects\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\Stevi\OneDrive\Documents\Projects\.venv\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start

ImportError: numpy.core.multiarray failed to import (auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; use '<void>numpy._import_array' to disable if you are certain you don't need it).

In [None]:
# Step 1: Load dataset
ratings = pd.read_csv("ml-latest-small/ratings.csv")

# Surprise needs a Reader with rating scale
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [None]:
# -------------------------------
# Step 2: Train-test split
# -------------------------------
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# -------------------------------
# Step 3: User-Based CF algorithm
# -------------------------------
sim_options = {
    "name": "cosine",   # similarity metric: 'cosine', 'pearson', 'msd'
    "user_based": True  # True = User-based CF, False = Item-based CF
}
algo = KNNBasic(sim_options=sim_options)

# Train the model
algo.fit(trainset)

In [None]:
# -------------------------------
# Step 4: Predict and Evaluate
# -------------------------------
predictions = algo.test(testset)
accuracy.rmse(predictions)
accuracy.mae(predictions)

In [None]:
# -------------------------------
# Step 5: Recommend for a user
# -------------------------------
def get_top_n(predictions, n=5):
    """Return top-N recommendation for each user"""
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        top_n.setdefault(uid, [])
        top_n[uid].append((iid, est))
    # Sort by estimated rating
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=5)

In [None]:
# Show top-5 recommendations for first 3 users
for uid, user_recs in list(top_n.items())[:3]:
    print(f"\nTop-5 Recommendations for User {uid}:")
    for (iid, est) in user_recs:
        print(f"  MovieID {iid} | Predicted Rating: {est:.2f}")