In [1]:
import pandas as pd
import numpy as np

In [2]:
users_score = pd.read_csv('data/users-score-2023.csv')

In [3]:
users_score.head()

Unnamed: 0,user_id,Username,anime_id,Anime Title,rating
0,1,Xinil,21,One Piece,9
1,1,Xinil,48,.hack//Sign,7
2,1,Xinil,320,A Kite,5
3,1,Xinil,49,Aa! Megami-sama!,8
4,1,Xinil,304,Aa! Megami-sama! Movie,8


In [4]:
from sklearn.preprocessing import MinMaxScaler

In [5]:
# scale rating column
# Create a MinMaxScaler object
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale the 'score' column between 0 and 1
users_score['scaled_score'] = scaler.fit_transform(users_score[['rating']])

In [6]:
users_score.head()

Unnamed: 0,user_id,Username,anime_id,Anime Title,rating,scaled_score
0,1,Xinil,21,One Piece,9,0.888889
1,1,Xinil,48,.hack//Sign,7,0.666667
2,1,Xinil,320,A Kite,5,0.444444
3,1,Xinil,49,Aa! Megami-sama!,8,0.777778
4,1,Xinil,304,Aa! Megami-sama! Movie,8,0.777778


In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
user_encoder = LabelEncoder()
users_score["user_encoded"] = user_encoder.fit_transform(users_score['user_id'])

In [9]:
anime_encoder = LabelEncoder()
users_score["anime_encoded"] = anime_encoder.fit_transform(users_score["anime_id"])

In [10]:
num_users = len(user_encoder.classes_)
num_anime = len(anime_encoder.classes_)
print(f'Unique users: {num_users}')
print(f'Unique anime: {num_anime}')

Unique users: 270033
Unique anime: 16500


In [10]:
import pickle

In [13]:
# Store encoders in a dictionary
encoders = {'user_encoder': user_encoder, 'anime_encoder': anime_encoder}

# Save the encoders to a file using pickle
with open('encoders.pkl', 'wb') as f:
    pickle.dump(encoders, f)

In [11]:
from sklearn.utils import shuffle

In [12]:
# Shuffle the dataset
df = shuffle(users_score, random_state=100)

# Create feature matrix X and target variable y
X = df[['user_encoded', 'anime_encoded']].values
y = df["scaled_score"].values

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
test_set_size = 10000  # Number of samples to include in the test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size, random_state=73)

In [15]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [16]:
X_train

array([[248115,    548],
       [269902,   5725],
       [263594,     51],
       ...,
       [232635,   7542],
       [110257,  13677],
       [ 37291,   1537]], dtype=int64)

In [17]:
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [18]:
def RecommenderNet(num_users, num_animes, embedding_size=128):
    # User input layer and embedding layer
    user = Input(name='user_encoded', shape=[1])
    user_embedding = Embedding(name='user_embedding', input_dim=num_users, output_dim=embedding_size)(user)
    
    # Anime input layer and embedding layer
    anime = Input(name='anime_encoded', shape=[1])
    anime_embedding = Embedding(name='anime_embedding', input_dim=num_animes, output_dim=embedding_size)(anime)
    
    # Dot product of user and anime embeddings
    dot_product = Dot(name='dot_product', normalize=True, axes=2)([user_embedding, anime_embedding])
    flattened = Flatten()(dot_product)
    
    # Dense layers for prediction
    dense = Dense(64, activation='relu')(flattened)
    output = Dense(1, activation='sigmoid')(dense)
    
    # Create and compile the model
    model = Model(inputs=[user, anime], outputs=output)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=["mae", "mse"])
    
    return model


In [19]:
model = RecommenderNet(num_users, num_anime)

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_encoded (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 anime_encoded (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 128)               3456422   ['user_encoded[0][0]']        
                                                          4                                       
                                                                                                  
 anime_embedding (Embedding  (None, 1, 128)               2112000   ['anime_encoded[0][0]']   

In [21]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping

In [22]:
start_lr = 0.00001
min_lr = 0.00001
max_lr = 0.00005
batch_size = 10000

rampup_epochs = 5
sustain_epochs = 0
exp_decay = .8

def lrfn(epoch):
    if epoch < rampup_epochs:
        return (max_lr - start_lr) / rampup_epochs * epoch + start_lr
    elif epoch < rampup_epochs + sustain_epochs:
        return max_lr
    else:
        return (max_lr - min_lr) * exp_decay**(epoch - rampup_epochs - sustain_epochs) + min_lr
    
lr_callback = LearningRateScheduler(lambda epoch: lrfn(epoch), verbose=0)


early_stopping = EarlyStopping(patience=3, monitor='val_loss', mode='min', restore_best_weights=True)

my_callbacks = [lr_callback,
                early_stopping]

In [23]:
history = model.fit(
    x=X_train_array,
    y=y_train,
    batch_size=batch_size,
    epochs=20,
    verbose=1,
    validation_data=(X_test_array, y_test),
    callbacks=my_callbacks
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


In [29]:
model.save('myanimemodel.h5')

  saving_api.save_model(
