In [1]:
import pandas as pd
import os
import sys
import numpy as np
import tensorflow as tf
import tensorflow.keras as krs
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from tqdm.autonotebook import tqdm
from tqdm import trange

  del sys.path[0]


In [2]:
%matplotlib inline

In [3]:
%load_ext autoreload

%autoreload 2

In [4]:
if os.getcwd().endswith("notebooks"):
    os.chdir(os.pardir)

In [5]:
sys.path.append(os.getcwd())

In [6]:
from src.data.train_data_utils import prepare_experiment_data
from src.models.training_utils import generator_ratings, generator_ratings_features, mae_masked, mape_masked, mse_masked
from src.models.train_model import get_collabfiltering_model1, build_shallow_autorec_single_input, build_autorec_multi_input2, get_array

# Data prep

In [9]:
exd = prepare_experiment_data()

Processed: 0.0%
Processed: 0.27373261797875836%
Processed: 0.5474652359575167%
Processed: 0.8211978539362751%
Processed: 0.0%
Processed: 0.4555393586005831%
Processed: 0.9110787172011662%
Processed: 0.0%
Processed: 0.6804109682248077%


# Model prep

In [76]:
dim_embed = 30
nusers = max(exd.Xraw_train.user_id.max(), exd.Xraw_test.user_id.max(), exd.Xraw_valid.user_id.max())
nitems = exd.Xraw_train.item_id.max()

f_input = krs.Input(shape=(exd.Xfeatures_train.shape[1],), name="features_input", dtype='float32')
user_input = krs.Input(shape=(1,), name="user_input", dtype='int32')
u = krs.layers.Embedding(nusers + 1, dim_embed)(user_input)

item_input = krs.Input(shape=(1,), name='item_input', dtype='int32')
i = krs.layers.Embedding(nitems + 1, dim_embed)(item_input)

ui = krs.layers.Dot(axes=(2,2))([u, i])
ui = krs.layers.Flatten()(ui)
all_features = krs.layers.Concatenate()([f_input, ui])
drop = krs.layers.Dropout(0.4)(all_features)

d1 = krs.layers.Dense(32, activation="relu")(all_features)
drop2 = krs.layers.Dropout(0.4)(d1)
d2 = krs.layers.Dense(16, activation="relu")(drop2)
out = krs.layers.Dense(1)(d2)

model = krs.Model(inputs=[user_input, item_input, f_input], outputs=out)
model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae', 'mape'])

In [77]:
print(model.summary())

Model: "model_13"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_42 (Embedding)        (None, 1, 30)        610050      user_input[0][0]                 
__________________________________________________________________________________________________
embedding_43 (Embedding)        (None, 1, 30)        196680      item_input[0][0]                 
___________________________________________________________________________________________

In [78]:
x = [
        exd.Xraw_train.user_id.to_numpy(),
        exd.Xraw_train.item_id.to_numpy(),
        exd.Xfeatures_train
    ]
y = exd.Xraw_train.rating.to_numpy()

x_val = [
    exd.Xraw_valid.user_id.to_numpy(),
    exd.Xraw_valid.item_id.to_numpy(),
    exd.Xfeatures_valid
]
y_val = exd.Xraw_valid.rating.to_numpy()


In [79]:
model_history = model.fit(x=x, y=y, batch_size=32, epochs=10, validation_data=(x_val, y_val))

Train on 36532 samples, validate on 14697 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [87]:
yhat = model.predict(x=[
    exd.Xraw_test.user_id.to_numpy(dtype='int32'),
    exd.Xraw_test.item_id.to_numpy(dtype='int32'),
    exd.Xfeatures_test.astype('float32')
])

In [33]:
import gc
tf.keras.backend.clear_session()
gc.collect()

1631