# ANIME RECOMMENDER SYSTEM - HYBRID FILTERING - TWO TOWER - CASE 3

In [1]:
# basic library
import numpy as np 
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')
import pickle
from PIL import Image

In [2]:
# load data
file = open('dataset/processed_dataset/merged/merged_df_3.pkl', 'rb')
merged_df = pickle.load(file)

In [3]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1512740 entries, 0 to 1512739
Columns: 287 entries, user_id to 99.1
dtypes: float64(208), int64(79)
memory usage: 3.2 GB


## train test split

In [4]:
from sklearn.utils import shuffle
merged_df = shuffle(merged_df)

In [5]:
from sklearn.model_selection import train_test_split
y = merged_df.user_score
x = merged_df.drop('user_score', axis=1)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state=0)

In [6]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1058918, 286)
(1058918,)
(453822, 286)
(453822,)


In [7]:
user_id_train = x_train.user_id
anime_id_train = x_train.anime_id
anime_content_train = x_train.drop(['user_id', 'anime_id', 'num_watched_episodes','created_at','updated_at'], axis=1)
rating_content_train = x_train[['num_watched_episodes','created_at','updated_at']]

In [8]:
user_id_test = x_test.user_id
anime_id_test = x_test.anime_id
anime_content_test = x_test.drop(['user_id', 'anime_id', 'num_watched_episodes','created_at','updated_at'], axis=1)
rating_content_test = x_test[['num_watched_episodes','created_at','updated_at']]

In [9]:
print(user_id_train.shape)
print(anime_id_train.shape)
print(anime_content_train.shape)
print(rating_content_train.shape)

(1058918,)
(1058918,)
(1058918, 281)
(1058918, 3)


## model building

In [10]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Activation, BatchNormalization, ReLU
from tensorflow.keras.models import Sequential

In [11]:
# #func api
# user_id = keras.Input(shape=(1,), name="in")
# x = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(user_id)
# x = Flatten()(x)
# x = Dense(8, activation="relu")(x)
# x = Dense(4, activation="relu")(x)
# encoder_output = Dense(4, activation="relu")(x)

# anime_id = keras.Input(shape=(1,), name="in2")
# x2 = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(anime_id)
# x2 = Flatten()(x2)
# anime_content = keras.Input(shape=(284,), name="in2_2")
# x2_1 = Dense(128, activation="relu")(anime_content)
# x2_2 =tf.keras.layers.Concatenate(axis=1)([x2, x2_1])

# x2_2 = Dense(8, activation="relu")(x2_2)
# x2_2 = Dense(4, activation="relu")(x2_2)
# encoder_output_2 = Dense(4, activation="relu")(x2_2)

# dotted = tf.keras.layers.Dot(axes=-1)([encoder_output, encoder_output_2])

# model = keras.Model(inputs=[user_id, anime_id, anime_content], outputs=dotted, name="model")

In [12]:
#func api
user_id = keras.Input(shape=(1,), name="in")
x = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(user_id)
x = Flatten()(x)
user_content = keras.Input(shape=(3,), name="in1_2")
x1_1 = Dense(128, activation="relu")(user_content)
x1_2 =tf.keras.layers.Concatenate(axis=1)([x, x1_1])

x1_2 = Dense(8, activation="relu")(x1_2)
x1_2 = Dense(4, activation="relu")(x1_2)
encoder_output = Dense(4, activation="relu")(x1_2)

anime_id = keras.Input(shape=(1,), name="in2")
x2 = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(anime_id)
x2 = Flatten()(x2)
anime_content = keras.Input(shape=(281,), name="in2_2")
x2_1 = Dense(128, activation="relu")(anime_content)
x2_2 =tf.keras.layers.Concatenate(axis=1)([x2, x2_1])

x2_2 = Dense(8, activation="relu")(x2_2)
x2_2 = Dense(4, activation="relu")(x2_2)
encoder_output_2 = Dense(4, activation="relu")(x2_2)

dotted = tf.keras.layers.Dot(axes=-1)([encoder_output, encoder_output_2])

model = keras.Model(inputs=[user_id, user_content, anime_id, anime_content], outputs=dotted, name="model")

In [13]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 in (InputLayer)                [(None, 1)]          0           []                               
                                                                                                  
 in2 (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 32)        33885376    ['in[0][0]']                     
                                                                                                  
 in1_2 (InputLayer)             [(None, 3)]          0           []                               
                                                                                              

In [14]:
optimizer = keras.optimizers.Adam(learning_rate=0.001, weight_decay=1e-4)

model.compile(
    loss='mean_squared_error', #mse
    optimizer=optimizer,
    metrics=["mean_squared_error"], #mse 
)

history = model.fit([user_id_train, rating_content_train, anime_id_train, anime_content_train], y_train, batch_size=8192, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
predictions = model.predict([user_id_test, rating_content_test, anime_id_test, anime_content_test])



In [16]:
print(type(predictions))

<class 'numpy.ndarray'>


In [17]:
from sklearn.metrics import mean_squared_error
print("MSE",mean_squared_error(y_test,predictions))

MSE 0.01873869615636963


In [18]:
from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test,predictions))

MAE 0.1043918160194212


In [19]:
from sklearn.metrics import r2_score
print("r-squared",r2_score(y_test,predictions))

r-squared 0.42446447111834607


In [20]:
from sklearn.metrics import mean_squared_error
print("RMSE",mean_squared_error(y_test,predictions, squared=False))

RMSE 0.1368893573524605


In [21]:
test_arr = predictions.ravel()
pred_arr = pd.DataFrame(test_arr, columns = ['pred'])

In [22]:
y_test_df = y_test.to_frame()

In [23]:
dat1 = pred_arr.reset_index(drop=True)
dat2 = y_test_df.reset_index(drop=True)

In [24]:
dat_1_2 = dat1.join(dat2)

In [25]:
dat_1_2

Unnamed: 0,pred,user_score
0,0.665753,0.555556
1,0.585996,0.555556
2,0.677283,0.777778
3,0.594659,0.666667
4,0.793893,0.444444
...,...,...
453817,0.524686,0.666667
453818,0.718142,0.666667
453819,0.855579,0.777778
453820,0.799773,1.000000


In [26]:
model.save('model/model_impl.h5')

In [27]:
user_id_test

646417      7824
64022        765
596589      7200
1374607    18207
39663        461
           ...  
1258941    16706
504956      6102
68174        810
902601     11793
1486699    19962
Name: user_id, Length: 453822, dtype: int64

In [28]:
anime_id_test

646417      2404.0
64022       2178.0
596589       459.0
1374607    38101.0
39663      44200.0
            ...   
1258941     3572.0
504956     34542.0
68174         32.0
902601      1689.0
1486699    12685.0
Name: anime_id, Length: 453822, dtype: float64

In [29]:
anime_content_test

Unnamed: 0,score,members,type,studio,episode_count,Action,Adult Cast,Adventure,Anthropomorphic,Avant Garde,...,90.1,91.1,92.1,93.1,94.1,95.1,96.1,97.1,98.1,99.1
646417,0.750823,0.037627,4,856,0.003598,1,0,0,0,0,...,0.359933,-1.310139,0.144374,-0.268725,1.262138,-0.873796,-0.434602,0.179176,-0.957908,0.393838
64022,0.737651,0.004904,0,399,0.000327,1,0,0,0,0,...,-0.585385,-0.554892,1.634552,-0.052813,-0.097895,-0.688679,-0.149594,-0.020805,0.503872,1.104498
596589,0.778266,0.032160,0,789,0.000327,1,0,1,0,0,...,0.185875,-0.192132,0.172160,0.908934,1.087403,-0.487874,0.058215,0.232002,0.746394,-0.285596
1374607,0.841932,0.232772,4,783,0.003925,0,0,0,0,0,...,0.540676,0.434250,-0.622155,0.221160,-0.291174,-1.270174,-0.432091,0.913348,-0.049804,-1.167099
39663,0.832053,0.054662,0,103,0.000327,1,0,0,0,0,...,0.758464,0.199535,-0.718201,-0.133665,-0.077102,0.490625,-0.409398,-1.437865,0.275404,-0.249523
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258941,0.867179,0.039454,4,594,0.008178,1,0,0,0,0,...,-1.636199,0.353041,-1.122502,-0.548238,-0.743120,0.123896,-0.411761,0.824444,0.025895,-0.845484
504956,0.839737,0.164033,4,389,0.003598,1,0,0,0,0,...,0.201807,-0.849482,0.999874,-0.775421,-0.060742,-0.450195,0.251216,-0.063047,-0.413092,-1.141814
68174,0.938529,0.232847,0,248,0.000327,0,0,0,0,1,...,-0.939651,0.484272,-0.384013,0.735177,-2.343993,-0.195049,-0.195462,2.754975,0.764059,0.912075
902601,0.833150,0.244760,0,137,0.000981,0,0,0,0,0,...,-0.210145,0.831635,-0.179476,1.738406,0.590371,0.415879,-0.258699,0.223870,-1.330014,0.091840
