In [111]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error


# we choose a subset of the netflix rating dataset found here : https://www.kaggle.com/datasets/rishitjavia/netflix-movie-rating-dataset?select=Netflix_Dataset_Rating.csv



In [112]:
import os as os

anime_datapath = os.path.join(os.path.dirname(os.getcwd()),'Dataset','anime.csv')
ratings1_datapath = os.path.join(os.path.dirname(os.getcwd()),'Dataset','rating1.csv')
ratings2_datapath = os.path.join(os.path.dirname(os.getcwd()),'Dataset','rating2.csv')

In [113]:
ratings1_dataset = pd.read_csv(ratings1_datapath)
ratings2_dataset = pd.read_csv(ratings2_datapath)

In [114]:
rating_data = pd.concat([ratings1_dataset, ratings2_dataset],axis=0)
rating_data.reset_index(inplace=True)
rating_data.drop(columns=['index'],inplace=True)
rating_data.head(1000)

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
995,7,18095,7
996,7,18097,7
997,7,18139,8
998,7,18153,8


In [115]:
df = rating_data.iloc[:1000,:]

In [116]:
df

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
995,7,18095,7
996,7,18097,7
997,7,18139,8
998,7,18153,8


In [117]:
X = df[['user_id','anime_id']].to_numpy()
y = df['rating'].to_numpy()

In [118]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [164]:

class NeuralCF:
    def __init__(self, num_users, num_items, embedding_dim=10, hidden_layers=[64, 32], activation='relu', learning_rate=0.001):
        self.num_users = num_users
        self.num_items = num_items
        self.embedding_dim = embedding_dim
        self.hidden_layers = hidden_layers
        self.activation = activation
        self.learning_rate = learning_rate
    def _build_model(self):
        user_input = Input(shape=(1,))
        print(user_input)
        item_input = Input(shape=(1,))
        user_embedding = Embedding(self.num_users, self.embedding_dim)(user_input)
        user_embedding = Flatten()(user_embedding)
        item_embedding = Embedding(self.num_items, self.embedding_dim)(item_input)
        item_embedding = Flatten()(item_embedding)
        vector = Concatenate()([user_embedding, item_embedding])
        for units in self.hidden_layers:
            vector = Dense(units, activation=self.activation)(vector)
        output = Dense(1, activation='sigmoid')(vector)
        model = Model(inputs=[user_input, item_input], outputs=output)
        return model
    def train(self, X_train, y_train, epochs=10, batch_size=10, validation_split=0.1):
        X_train = [X_train[:, 0], X_train[:, 1]]
        y_train = np.array(y_train)
        model = self._build_model()
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss='mean_squared_error')
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)
        self.model = model
    def predict(self, X_test):
        X_test = [X_test[:, 0], X_test[:, 1]]
        return self.model.predict(X_test)

In [165]:

num_users = df['user_id'].max() + 1
num_items = df['anime_id'].max() + 1

In [166]:
# Hyperparameters that could be tuned
embedding_dim = 10
hidden_layers = [64, 32]
activation = 'relu'
learning_rate = 0.001

In [167]:

# Train and predict using NeuralCF
ncf = NeuralCF(num_users, num_items, embedding_dim, hidden_layers, activation, learning_rate)
ncf.train(X_train, y_train)
y_pred = ncf.predict(X_test)

<KerasTensor shape=(None, 1), dtype=float32, sparse=False, name=keras_tensor_101>
Epoch 1/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 23.2246 - val_loss: 21.0232
Epoch 2/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 22.2453 - val_loss: 20.4922
Epoch 3/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 21.3139 - val_loss: 20.3952
Epoch 4/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 21.4452 - val_loss: 20.3816
Epoch 5/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 18.8886 - val_loss: 20.3778
Epoch 6/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 22.1812 - val_loss: 20.3765
Epoch 7/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 19.7732 - val_loss: 20.3758
Epoch 8/10
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms

In [168]:
np.ceil(y_pred)

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],

In [169]:

# Evaluate using Mean Squared Error
mse = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mse)

Mean Absolute Error: 4.11497542384539
