# Import dan Load Dataset

In [1]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
import joblib
import os


# Load CSV

In [4]:
ratings_df = pd.read_csv("data/ratings.csv")
movies_df = pd.read_csv("data/movies.csv")

# Siapkan Data untuk Training

In [5]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)


# Latih Model & Simpan

In [None]:
model = SVD()
model.fit(trainset)

os.makedirs("models", exist_ok=True)
joblib.dump(model, "models/svd_model.joblib")


# Fungsi Rekomendasi

In [None]:
def recommend_movies_for_user(user_id, ratings_df, movies_df, model, n=10):
    all_movie_ids = movies_df['movieId'].unique()
    watched_movie_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()
    unseen_movie_ids = [m for m in all_movie_ids if m not in watched_movie_ids]
    
    predictions = [model.predict(user_id, movie_id) for movie_id in unseen_movie_ids]
    top_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    
    top_movie_ids = [pred.iid for pred in top_predictions]
    return movies_df[movies_df['movieId'].isin(top_movie_ids)][['movieId', 'title']]


# Coba Rekomendasi

In [None]:
recommendations = recommend_movies_for_user(1, ratings_df, movies_df, model, n=10)
print(recommendations)
