# ANIME RECOMMENDER SYSTEM - HYBRID FILTERING - TWO TOWER - CASE 2

In [1]:
# basic library
import numpy as np 
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')
import pickle
from PIL import Image

In [2]:
# load data
file = open('dataset/processed_dataset/merged/merged_df_2.pkl', 'rb')
merged_df = pickle.load(file)

In [3]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1512740 entries, 0 to 1512739
Columns: 187 entries, user_id to 99
dtypes: float64(108), int64(79)
memory usage: 2.1 GB


## train test split

In [4]:
from sklearn.utils import shuffle
merged_df = shuffle(merged_df)

In [5]:
from sklearn.model_selection import train_test_split
y = merged_df.user_score
x = merged_df.drop('user_score', axis=1)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state=0)

In [6]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1058918, 186)
(1058918,)
(453822, 186)
(453822,)


In [7]:
user_id_train = x_train.user_id
anime_id_train = x_train.anime_id
anime_content_train = x_train.drop(['user_id', 'anime_id', 'num_watched_episodes','created_at','updated_at'], axis=1)
rating_content_train = x_train[['num_watched_episodes','created_at','updated_at']]

In [8]:
user_id_test = x_test.user_id
anime_id_test = x_test.anime_id
anime_content_test = x_test.drop(['user_id', 'anime_id', 'num_watched_episodes','created_at','updated_at'], axis=1)
rating_content_test = x_test[['num_watched_episodes','created_at','updated_at']]

In [9]:
print(user_id_train.shape)
print(anime_id_train.shape)
print(anime_content_train.shape)
print(rating_content_train.shape)

(1058918,)
(1058918,)
(1058918, 181)
(1058918, 3)


## model building

In [10]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Activation, BatchNormalization, ReLU
from tensorflow.keras.models import Sequential

In [11]:
# #func api
# user_id = keras.Input(shape=(1,), name="in")
# x = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(user_id)
# x = Flatten()(x)
# x = Dense(8, activation="relu")(x)
# x = Dense(4, activation="relu")(x)
# encoder_output = Dense(4, activation="relu")(x)

# anime_id = keras.Input(shape=(1,), name="in2")
# x2 = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(anime_id)
# x2 = Flatten()(x2)
# anime_content = keras.Input(shape=(184,), name="in2_2")
# x2_1 = Dense(128, activation="relu")(anime_content)
# x2_2 =tf.keras.layers.Concatenate(axis=1)([x2, x2_1])

# x2_2 = Dense(8, activation="relu")(x2_2)
# x2_2 = Dense(4, activation="relu")(x2_2)
# encoder_output_2 = Dense(4, activation="relu")(x2_2)

# dotted = tf.keras.layers.Dot(axes=-1)([encoder_output, encoder_output_2])

# model = keras.Model(inputs=[user_id, anime_id, anime_content], outputs=dotted, name="model")

In [12]:
#func api
user_id = keras.Input(shape=(1,), name="in")
x = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(user_id)
x = Flatten()(x)
user_content = keras.Input(shape=(3,), name="in1_2")
x1_1 = Dense(128, activation="relu")(user_content)
x1_2 =tf.keras.layers.Concatenate(axis=1)([x, x1_1])

x1_2 = Dense(8, activation="relu")(x1_2)
x1_2 = Dense(4, activation="relu")(x1_2)
encoder_output = Dense(4, activation="relu")(x1_2)

anime_id = keras.Input(shape=(1,), name="in2")
x2 = tf.keras.layers.Embedding(1058918, 32, input_length=1, embeddings_initializer="RandomNormal",embeddings_regularizer=l2(0))(anime_id)
x2 = Flatten()(x2)
anime_content = keras.Input(shape=(181,), name="in2_2")
x2_1 = Dense(128, activation="relu")(anime_content)
x2_2 =tf.keras.layers.Concatenate(axis=1)([x2, x2_1])

x2_2 = Dense(8, activation="relu")(x2_2)
x2_2 = Dense(4, activation="relu")(x2_2)
encoder_output_2 = Dense(4, activation="relu")(x2_2)

dotted = tf.keras.layers.Dot(axes=-1)([encoder_output, encoder_output_2])

model = keras.Model(inputs=[user_id, user_content, anime_id, anime_content], outputs=dotted, name="model")

In [13]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 in (InputLayer)                [(None, 1)]          0           []                               
                                                                                                  
 in2 (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 32)        33885376    ['in[0][0]']                     
                                                                                                  
 in1_2 (InputLayer)             [(None, 3)]          0           []                               
                                                                                              

In [15]:
optimizer = keras.optimizers.Adam(learning_rate=0.001, weight_decay=1e-4)

model.compile(
    loss='mean_squared_error', #mse
    optimizer=optimizer,
    metrics=["mean_squared_error"], #mse 
)

history = model.fit([user_id_train, rating_content_train, anime_id_train, anime_content_train], y_train, batch_size=8192, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
predictions = model.predict([user_id_test, rating_content_test, anime_id_test, anime_content_test])



In [18]:
print(type(predictions))

<class 'numpy.ndarray'>


In [19]:
from sklearn.metrics import mean_squared_error
print("MSE",mean_squared_error(y_test,predictions))

MSE 0.01911087961460873


In [20]:
from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test,predictions))

MAE 0.10597087444564027


In [21]:
from sklearn.metrics import r2_score
print("r-squared",r2_score(y_test,predictions))

r-squared 0.4131782261402145


In [22]:
from sklearn.metrics import mean_squared_error
print("RMSE",mean_squared_error(y_test,predictions, squared=False))

RMSE 0.13824210507153284


In [23]:
test_arr = predictions.ravel()
pred_arr = pd.DataFrame(test_arr, columns = ['pred'])

In [24]:
y_test_df = y_test.to_frame()

In [25]:
dat1 = pred_arr.reset_index(drop=True)
dat2 = y_test_df.reset_index(drop=True)

In [26]:
dat_1_2 = dat1.join(dat2)

In [27]:
dat_1_2

Unnamed: 0,pred,user_score
0,0.748287,0.666667
1,0.730688,0.666667
2,0.623947,0.666667
3,0.550517,0.666667
4,0.783924,0.666667
...,...,...
453817,0.797681,0.777778
453818,0.425632,0.444444
453819,0.615990,0.777778
453820,0.633675,0.555556


In [28]:
model.save('model/model_impl.h5')

In [29]:
user_id_test

59041        693
74013        884
1219181    16186
261111      3112
858985     11168
           ...  
398303      4832
593573      7164
697639      8451
378736      4592
1511425        1
Name: user_id, Length: 453822, dtype: int64

In [30]:
anime_id_test

59041      34240.0
74013        468.0
1219181    11161.0
261111      2795.0
858985       523.0
            ...   
398303       269.0
593573      8426.0
697639     12967.0
378736      3001.0
1511425    51585.0
Name: anime_id, Length: 453822, dtype: float64

In [31]:
anime_content_test

Unnamed: 0,score,members,type,studio,episode_count,Action,Adult Cast,Adventure,Anthropomorphic,Avant Garde,...,90,91,92,93,94,95,96,97,98,99
59041,0.912184,0.087670,5,7,0.000327,0,0,0,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.212436,0.000000,0.000000,0.000000
74013,0.855104,0.042496,0,541,0.000327,0,1,0,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.391274,0.000000,0.000000,0.000000
1219181,0.749726,0.008099,3,185,0.000327,0,0,0,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.494638,0.000000,0.000000
261111,0.725576,0.020316,4,262,0.008178,1,0,0,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.501136,0.000000
858985,0.906696,0.269612,0,675,0.000327,0,0,1,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398303,0.866081,0.488930,4,516,0.119725,1,0,1,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.163598,0.000000,0.000000,0.188204
593573,0.845225,0.034395,4,16,0.003598,0,0,0,0,0,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
697639,0.687157,0.055242,4,315,0.003925,1,0,0,0,0,...,0.000000,0.0,0.0,0.0,0.215145,0.0,0.173495,0.000000,0.000000,0.000000
378736,0.829857,0.019558,4,777,0.003598,0,0,0,1,0,...,0.397809,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
