# Import Library

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load and prepare data

In [5]:
ratings = pd.read_csv("ratings.csv")[["userId", "movieId", "rating"]]
ratings

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
...,...,...,...
99999,671,6268,2.5
100000,671,6269,4.0
100001,671,6365,4.0
100002,671,6385,2.5


# Encode user and movie IDs

In [6]:
user_enc = LabelEncoder()
movie_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['userId'].values)
ratings['movie'] = movie_enc.fit_transform(ratings['movieId'].values)

# Normalize ratings to 0-1 range (for sigmoid output)

In [7]:
min_rating, max_rating = ratings['rating'].min(), ratings['rating'].max()
ratings['rating'] = (ratings['rating'] - min_rating) / (max_rating - min_rating)

# Split data

In [8]:
X = ratings[['user', 'movie']].values
y = ratings['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Get number of users and movies

In [9]:
n_users = ratings['user'].nunique()
n_movies = ratings['movie'].nunique()

# Model parameters

In [10]:
embedding_size = 50
hidden_units = [128, 64, 32]
dropout_rate = 0.2
l2_reg = 0.01
learning_rate = 0.001
batch_size = 64
epochs = 30

# Neural Collaborative Filtering Model

In [11]:
def create_model():
    # Input layers
    user_input = Input(shape=(1,), name='user_input')
    movie_input = Input(shape=(1,), name='movie_input')
    
    # Embedding layers
    user_embedding = Embedding(n_users, embedding_size, 
                              embeddings_regularizer=l2(l2_reg), 
                              name='user_embedding')(user_input)
    movie_embedding = Embedding(n_movies, embedding_size, 
                               embeddings_regularizer=l2(l2_reg), 
                               name='movie_embedding')(movie_input)
    
    # Flatten embeddings
    user_vec = Flatten(name='flatten_user')(user_embedding)
    movie_vec = Flatten(name='flatten_movie')(movie_embedding)
    
    # Concatenate features
    concat = Concatenate(name='concat')([user_vec, movie_vec])
    
    # Add dense layers
    x = concat
    for i, units in enumerate(hidden_units):
        x = Dense(units, activation='relu', 
                  kernel_regularizer=l2(l2_reg), 
                  name=f'hidden_{i}')(x)
        x = Dropout(dropout_rate)(x)
    
    # Output layer
    output = Dense(1, activation='sigmoid', name='output')(x)
    
    # Create model
    model = Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate), 
                  loss='mse', 
                  metrics=['mae', 'mse'])
    
    return model

# Create and train model

In [12]:
model = create_model()
model.summary()

# Early stopping callback

In [13]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model

In [None]:
history = model.fit(
    [X_train[:, 0], X_train[:, 1]], 
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.6799 - mae: 0.1969 - mse: 0.0587 - val_loss: 0.0555 - val_mae: 0.1891 - val_mse: 0.0555
Epoch 2/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0550 - mae: 0.1886 - mse: 0.0550 - val_loss: 0.0555 - val_mae: 0.1889 - val_mse: 0.0555
Epoch 3/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0553 - mae: 0.1891 - mse: 0.0553 - val_loss: 0.0555 - val_mae: 0.1891 - val_mse: 0.0555
Epoch 4/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0550 - mae: 0.1888 - mse: 0.0550 - val_loss: 0.0555 - val_mae: 0.1889 - val_mse: 0.0555
Epoch 5/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0555 - mae: 0.1893 - mse: 0.0555 - val_loss: 0.0555 - val_mae: 0.1892 - val_mse: 0.0555
Epoch 6/30
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

# Evaluation

In [None]:
test_loss, test_mae, test_mse = model.evaluate([X_test[:, 0], X_test[:, 1]], y_test)
print(f"\nTest MAE: {test_mae*(max_rating-min_rating)+min_rating:.4f}")
print(f"Test RMSE: {np.sqrt(test_mse)*(max_rating-min_rating)+min_rating:.4f}")

In [None]:
import os
os.makedirs('plots/neural', exist_ok=True)

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='plots/neural/model_architecture.png', show_shapes=True)

# Visualization

In [None]:
plt.figure(figsize=(15, 5))

# Plot training & validation loss values

In [None]:
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss (MSE)')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

# Plot training & validation MAE

In [None]:
plt.subplot(1, 2, 2)
plt.plot(np.array(history.history['mae'])*(max_rating-min_rating)+min_rating)
plt.plot(np.array(history.history['val_mae'])*(max_rating-min_rating)+min_rating)
plt.title('Model MAE')
plt.ylabel('MAE (Original Scale)')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

In [None]:
plt.tight_layout()
plt.savefig('plots/neural/training_history.png', bbox_inches='tight')
plt.show()

# Sample predictions

In [None]:
sample_users = np.random.choice(n_users, 5)
sample_movies = np.random.choice(n_movies, 5)

for user, movie in zip(sample_users, sample_movies):
    pred = model.predict([np.array([user]), np.array([movie])])[0][0]
    original_rating = pred * (max_rating - min_rating) + min_rating
    print(f"User {user_enc.inverse_transform([user])[0]} predicted rating for movie {movie_enc.inverse_transform([movie])[0]}: {original_rating:.2f}")