In [44]:
# Import library
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [45]:
# Load dataset
# Memuat data pengguna, film, dan rating dari dataset MovieLens
# Melakukan preprocessing data seperti normalisasi, pembagian data latih dan uji, dll

links = pd.read_csv('links.csv')
movies = pd.read_csv('movies.csv')
tags = pd.read_csv('tags.csv')
ratings = pd.read_csv('ratings.csv')

# Menggabungkan data film dan data penilaian
movies_ratings = pd.merge(ratings, movies, on='movieId')

# Menghapus nilai kosong
movies_ratings = movies_ratings.dropna()

# Mapping user and item ID ke bilangan bulat berurutan
user_mapping = {user_id: idx for idx, user_id in enumerate(movies_ratings['userId'].unique())}
item_mapping = {item_id: idx for idx, item_id in enumerate(movies_ratings['movieId'].unique())}

num_users = len(user_mapping)
num_items = len(item_mapping)

movies_ratings['user'] = movies_ratings['userId'].map(user_mapping)
movies_ratings['item'] = movies_ratings['movieId'].map(item_mapping)

# Normalisasi data dengan min-max scaling
scaler = MinMaxScaler()
movies_ratings['rating_normalized'] = scaler.fit_transform(movies_ratings[['rating']])

# Membagi data menjadi data latih dan data uji dengan perbandingan 80% dan 20%
train, test = train_test_split(movies_ratings, test_size=0.2, random_state=42)

# Inisiasliasi data latih
train_user = np.array(train['user'])
train_item = np.array(train['item'])
train_ratings = np.array(train['rating_normalized'])

# Inisialisasi data uji
test_user = np.array(test['user'])
test_item = np.array(test['item'])
test_ratings = np.array(test['rating_normalized'])

In [46]:
# Create model
# Membangun model NCF menggunakan TensorFlow/Keras
def create_ncf_model(num_users, num_items, latent_dim=8):
  # Input layer untuk user
  user_input = Input(shape=(1,))
  user_embedding = Embedding(num_users, latent_dim)(user_input)
  user_flat = Flatten()(user_embedding)
  # Input layer untuk item (film)
  item_input = Input(shape=(1,))
  item_embedding = Embedding(num_items, latent_dim)(item_input)
  item_flat = Flatten()(item_embedding)
  # Concatenate user dan item embeddings
  concat = Concatenate()([user_flat, item_flat])
  # Fully connected layer
  hidden = Dense(64, activation='relu')(concat)
  output = Dense(1, activation='sigmoid')(hidden)
  # Compile model
  model = Model(inputs=[user_input, item_input], outputs=output)
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

In [47]:
# Train model
# Melatih model menggunakan data latih
model = create_ncf_model(num_users, num_items)
model.fit([train_user, train_item], train_ratings, batch_size=64, epochs=10,
validation_data=([test_user, test_item], test_ratings))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b4dd431d5a0>

In [48]:
# Evaluate model
# Evaluasi model menggunakan data uji
test_loss = model.evaluate([test_user, test_item], test_ratings)
print(f"Test Loss: {test_loss}")

Test Loss: 0.03831171616911888


In [53]:
# Make recommendations
# Menggunakan model untuk membuat rekomendasi untuk pengguna tertentu
def movies_not_rated_by_user(user_id):
    user_rated_movies = set(movies_ratings[movies_ratings['user'] == user_mapping[user_id]]['item'])
    all_movies = set(range(num_items))
    not_rated_movies = all_movies - user_rated_movies
    return list(not_rated_movies)

def get_movie_titles(movie_ids):
    movie_titles = movies[movies['movieId'].isin(movie_ids)]['title'].tolist()
    return movie_titles

user_id = 83
user_movies = movies_not_rated_by_user(user_id)
user_input = np.array([user_id] * len(user_movies))
user_movies_input = np.array(user_movies)
predictions = model.predict([user_input, user_movies_input]).flatten()
recommended_movie_indices = np.argsort(predictions)[::-1][:10]
recommended_movie_ids = [user_movies[idx] for idx in recommended_movie_indices]



In [54]:
# Display recommended movies to the user
recommended_movies = get_movie_titles(recommended_movie_ids)
print("Recommended movies for user", user_id, ":", recommended_movies)

Recommended movies for user 83 : ['Black Tar Heroin: The Dark End of the Street (2000)', "Cutter's Way (1981)", 'Ice Age (2002)', 'City Heat (1984)', 'Sweet Sixteen (2002)', 'Going in Style (1979)']
