In [2]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [4]:
# Load data rating
ratings = pd.read_csv("ml-100k/u.data", sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])

# Load data film
movies = pd.read_csv("ml-100k/u.item", sep="|", encoding="latin-1", header=None)
movies = movies[[0, 1]]  # Ambil hanya movie_id dan title
movies.columns = ["movie_id", "title"]

# Gabungkan data rating dengan data film
df = pd.merge(ratings, movies, on="movie_id")
df.drop(columns=["timestamp"], inplace=True)  # Hapus kolom timestamp

# Persiapkan data untuk Surprise
reader = Reader(rating_scale=(1, 5))  # Skala rating 1-5
data = Dataset.load_from_df(df[["user_id", "movie_id", "rating"]], reader)

# Split data untuk training & testing
trainset, testset = train_test_split(data, test_size=0.2)

# Buat model SVD
model = SVD()
model.fit(trainset)

def recommend_movies(user_id, model, df, top_n=10):
    # Dapatkan semua movie_id yang tersedia
    movie_ids = df["movie_id"].unique()

    # Cari film yang belum ditonton oleh user
    watched_movies = df[df["user_id"] == user_id]["movie_id"].values
    unseen_movies = [mid for mid in movie_ids if mid not in watched_movies]

    # Prediksi rating untuk semua film yang belum ditonton
    predictions = [model.predict(user_id, mid) for mid in unseen_movies]
    
    # Urutkan berdasarkan rating tertinggi
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Ambil top_n film
    top_movies = predictions[:top_n]
    
    # Ambil judul film dari hasil rekomendasi
    recommended_titles = [movies[movies["movie_id"] == pred.iid]["title"].values[0] for pred in top_movies]
    
    return recommended_titles

recommend_movies(10, model, df)

['Close Shave, A (1995)',
 'Wallace & Gromit: The Best of Aardman Animation (1996)',
 'Some Folks Call It a Sling Blade (1993)',
 'Wrong Trousers, The (1993)',
 'To Kill a Mockingbird (1962)',
 "Schindler's List (1993)",
 'Killing Fields, The (1984)',
 'As Good As It Gets (1997)',
 'Ran (1985)',
 'Jean de Florette (1986)']

In [5]:
from flask import Flask, request, jsonify
import pandas as pd
from surprise import Dataset, Reader, SVD

app = Flask(__name__)

# Load dataset rating (pastikan file sudah ada)
ratings = pd.read_csv("ml-100k/u.data", sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])
reader = Reader(line_format="user item rating timestamp", sep="\t")
data = Dataset.load_from_df(ratings[["user_id", "movie_id", "rating"]], reader)

# Latih model rekomendasi
model = SVD()
trainset = data.build_full_trainset()
model.fit(trainset)

# API untuk merekomendasikan film berdasarkan user_id
@app.route("/recommend", methods=["GET"])
def recommend():
    user_id = int(request.args.get("user_id"))
    movie_ids = ratings["movie_id"].unique()
    
    # Prediksi rating untuk semua film
    predictions = [(movie, model.predict(user_id, movie).est) for movie in movie_ids]
    
    # Urutkan film dengan prediksi rating tertinggi
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:5]
    
    return jsonify({"user_id": user_id, "recommendations": recommendations})

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [6]:
pred = model.predict(uid=10, iid=50)
print(pred.est)  # Hasil estimasi rating


4.591543375988051


In [8]:
from surprise import accuracy

predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse}")


RMSE: 0.6746
RMSE: 0.6745994618494232


In [10]:
search_result = movies[movies["title"].str.contains("titanic", case=False, na=False)]
search_result

Unnamed: 0,movie_id,title
312,313,Titanic (1997)
