In [None]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow

In [None]:
import pandas as pd
import numpy as np
import time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense
from sklearn.metrics import mean_squared_error

In [None]:
# Load MovieLens Dataset
start_time = time.time()
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")

In [None]:
# Convert timestamps to human-readable format
ratings["date"] = pd.to_datetime(ratings["timestamp"], unit='s')
ratings.drop(columns=["timestamp"], inplace=True)

In [None]:
# Handle missing values
ratings.dropna(inplace=True)

In [None]:
# Filter out cold-start users and movies
movie_counts = ratings["movieId"].value_counts()
user_counts = ratings["userId"].value_counts()
ratings = ratings[ratings["movieId"].isin(movie_counts[movie_counts >= 10].index)]
ratings = ratings[ratings["userId"].isin(user_counts[user_counts >= 10].index)]

In [None]:
# Normalize ratings
min_rating = ratings["rating"].min()
max_rating = ratings["rating"].max()
ratings["normalized_rating"] = (ratings["rating"] - min_rating) / (max_rating - min_rating)

In [None]:
# Train NCF Model
start_time = time.time()
num_users = ratings["userId"].nunique()
num_movies = ratings["movieId"].nunique()

user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=num_users, output_dim=50)(user_input)
movie_embedding = Embedding(input_dim=num_movies, output_dim=50)(movie_input)

user_vec = Flatten()(user_embedding)
movie_vec = Flatten()(movie_embedding)

dot_product = Dot(axes=1)([user_vec, movie_vec])
output = Dense(1, activation='linear')(dot_product)

ncf_model = Model([user_input, movie_input], output)
ncf_model.compile(optimizer='adam', loss='mse')

# Generate training data
train_users = np.random.randint(0, num_users, size=(100000,))
train_movies = np.random.randint(0, num_movies, size=(100000,))
train_ratings = np.random.rand(100000)

ncf_model.fit([train_users, train_movies], train_ratings, epochs=10, batch_size=64)
end_time = time.time()
print(f"NCF Model Training Time: {end_time - start_time:.4f} seconds")

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.1713
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0602
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - loss: 0.0094
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - loss: 0.0036
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0049
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - loss: 0.0070
Epoch 7/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0051
Epoch 8/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0037
Epoch 9/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0039
Epoch 10/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
# Evaluate NCF Model
start_time = time.time()
predicted_ratings = ncf_model.predict([train_users, train_movies])
mse_ncf = mean_squared_error(train_ratings, predicted_ratings)
rmse_ncf = np.sqrt(mse_ncf)
end_time = time.time()
print(f"Model Evaluation Time: {end_time - start_time:.4f} seconds")
print(f"NCF Model -> RMSE: {rmse_ncf}, MSE: {mse_ncf}")

[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 969us/step
Model Evaluation Time: 4.2687 seconds
NCF Model -> RMSE: 0.06258981040117298, MSE: 0.003917484366054782
