# Import dan Load Dataset

In [1]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
import joblib
import os


# Load CSV

In [2]:
ratings_df = pd.read_csv("data/ratings.csv")
movies_df = pd.read_csv("data/movies.csv")

# Siapkan Data untuk Training

In [3]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)


# Latih Model & Simpan

In [7]:
model = SVD()
model.fit(trainset)

os.makedirs("models", exist_ok=True)
joblib.dump(model, "models/svd_model.joblib")


# Prediksi dan Akurasi Test

In [8]:
from surprise import accuracy
 
predictions = model.test(testset)
print("RMSE:", accuracy.rmse(predictions))
print("MAE:", accuracy.mae(predictions))

RMSE: 0.7776
RMSE: 0.7775682060005937
MAE:  0.5867
MAE: 0.5867155281895922


# Import & Load Model (Jika sudah pernah train)

In [None]:
import pandas as pd
import joblib

# Load model
model = joblib.load("models/svd_model.joblib")


# Fungsi Rekomendasi

In [None]:
def recommend_movies_for_user(user_id, ratings_df, movies_df, model, n=10):
    all_movie_ids = movies_df['movieId'].unique()
    watched_movie_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()
    unseen_movie_ids = [m for m in all_movie_ids if m not in watched_movie_ids]
    
    predictions = [model.predict(user_id, movie_id) for movie_id in unseen_movie_ids]
    top_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    
    top_movie_ids = [pred.iid for pred in top_predictions]
    return movies_df[movies_df['movieId'].isin(top_movie_ids)][['movieId', 'title']]


# Coba Rekomendasi

In [None]:
recommendations = recommend_movies_for_user(33, ratings_df, movies_df, model, n=10)
print(recommendations)


       movieId                                              title
12841    64241                Lonely Wife, The (Charulata) (1964)
13028    66019  Great Ecstasy of Woodcarver Steiner, The (Groß...
13562    70186  Heimat - A Chronicle of Germany (Heimat - Eine...
17880    93404                    Queen: Days of Our Lives (2011)
20609   106503                      Enemies of Reason, The (2007)
26660   126927                      The Heart of the World (2000)
26741   127098        Louis C.K.: Live at The Comedy Store (2015)
45256   169954                                 Prohibition (2011)
46097   171749                  Death Note: Desu nôto (2006–2007)
49577   179173                           Rabbit of Seville (1950)
