In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.optimizers import Adam

In [2]:
df = pd.read_csv('./datasets/ratings.csv')

In [3]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['userId'].values)
df['movie'] = movie_encoder.fit_transform(df['movieId'].values)

In [9]:
df

Unnamed: 0,userId,movieId,rating,timestamp,user,movie
0,1,1,4.0,964982703,0,0
1,1,3,4.0,964981247,0,2
2,1,6,4.0,964982224,0,5
3,1,47,5.0,964983815,0,43
4,1,50,5.0,964982931,0,46
...,...,...,...,...,...,...
100831,610,166534,4.0,1493848402,609,9416
100832,610,168248,5.0,1493850091,609,9443
100833,610,168250,5.0,1494273047,609,9444
100834,610,168252,5.0,1493846352,609,9445


In [4]:
num_users = df['user'].nunique()
num_movies = df['movie'].nunique()

X = df[['user', 'movie']].values
y = df['rating'].values

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
user_input = Input(shape=(1,), name='user_input')
movie_input = Input(shape=(1,), name='movie_input')

user_embedding = Embedding(num_users, 50, name='user_embedding')(user_input)
movie_embedding = Embedding(num_movies, 50, name='movie_embedding')(movie_input)

user_vector = Flatten(name='FlattenUsers')(user_embedding)
movie_vector = Flatten(name='FlattenMovies')(movie_embedding)

conc = Concatenate()([user_vector, movie_vector])

dense = Dense(128, activation='relu')(conc)
output = Dense(1)(dense)

model = Model([user_input, movie_input], output)

model.compile(optimizer='adam', loss='mean_squared_error')

In [7]:
history = model.fit([X_train[:, 0], X_train[:, 1]], y_train, 
                    batch_size=64, epochs=30, 
                    validation_split=0.2)

Epoch 1/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3.1098 - val_loss: 0.8105
Epoch 2/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.7104 - val_loss: 0.7821
Epoch 3/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.6539 - val_loss: 0.7875
Epoch 4/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.6257 - val_loss: 0.7724
Epoch 5/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.5979 - val_loss: 0.7803
Epoch 6/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.5681 - val_loss: 0.7939
Epoch 7/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.5385 - val_loss: 0.8089
Epoch 8/30
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.5219 - val_loss: 0.8172
Epoch 9/30
[1m1009/1009

In [8]:
model.evaluate([X_test[:, 0], X_test[:, 1]], y_test)

predictions = model.predict([X_test[:, 0], X_test[:, 1]])

[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 535us/step - loss: 1.0779
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 795us/step


In [14]:
predictions

array([[3.6662695],
       [3.1210124],
       [1.4596105],
       ...,
       [4.356893 ],
       [3.8054943],
       [2.2416956]], dtype=float32)