In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf

import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ratings = pd.read_csv('data/ratings.csv')
movies = pd.read_csv('data/movies.csv')

In [3]:
print(ratings.head())
print(movies.head())

   userId  movieId  rating  timestamp
0       1       17     4.0  944249077
1       1       25     1.0  944250228
2       1       29     2.0  943230976
3       1       30     5.0  944249077
4       1       32     5.0  943228858
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [4]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

ratings['userId'] = user_encoder.fit_transform(ratings['userId'])
ratings['movieId'] = movie_encoder.fit_transform(ratings['movieId'])

num_users = ratings['userId'].nunique()
num_movies = ratings['movieId'].nunique()

### Creating a Neural Collaborative Filtering Model (with Tensorflow)

In [6]:
class NCFModel(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_size):
        super(NCFModel, self).__init__()
        self.user_embedding = tf.keras.layers.Embedding(num_users, embedding_size)
        self.movie_embedding = tf.keras.layers.Embedding(num_movies, embedding_size)

        self.fc1 = tf.keras.layers.Dense(128, activation='relu')
        self.fc2 = tf.keras.layers.Dense(64, activation='relu')
        self.output_layer = tf.keras.layers.Dense(1, activation='sigmoid')  # For ratings between 0 and 1

    def call(self, inputs):
        user_id, movie_id = inputs
        user_embedding = self.user_embedding(user_id)
        movie_embedding = self.movie_embedding(movie_id)

        # Concatenate user and movie embeddings
        concatenated = tf.concat([user_embedding, movie_embedding], axis=1)
        x = self.fc1(concatenated)
        x = self.fc2(x)
        return self.output_layer(x)

In [7]:
embedding_size = 50  # You can experiment with this value
model = NCFModel(num_users, num_movies, embedding_size)

In [8]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [9]:
X = ratings[['userId', 'movieId']].values
y = ratings['rating'].values