In [1324]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split 

In [1325]:
ratings_df = pd.read_csv('ratings_matrix.csv')
ratings_df.head()

Unnamed: 0,User_id,"""Chosen"" Classics: Round the World in Eighty Days","""D"" is for Deadbeat","""Mildred Pierce","""Ra Force Rising"": Brother G","""Thirty years in hell""; or, ""From darkness to light",'Tis The Season: The Choice\First Fruits\A New Year; A New Beginning,'Tis the Season to Be Murdered,... Summer moonshine,...Arrow pointing nowhere,...,Zane's Gettin' Buck Wild: Sex Chronicles II,Zane's Skyscraper: A Novel,Zazie dans le Mtro,Zen Attitude,Zia,Zorba the Greek,green valley,never too much,our davie pepper,the Picture of Dorian Gray
0,A106016KSI0YQ,,,,,,,,,,...,,,,,,,,,,
1,A106E1N0ZQ4D9W,,,,,,,,,2.0,...,,,,,,,,,,
2,A10T0OW97SFBB,5.0,,,,,,,,,...,,,,,,,,,,
3,A10Y3OZWENAQ6W,,,,,,,,,,...,,,,,,,,,,
4,A1129LM24YWSZV,,,,,,,,,,...,,,,,,,,,,


In [1395]:
ratings_df.fillna(0, inplace=True)
#ratings_df = ratings_df.astype(int)
users = ratings_df['User_id']
ratings_matrix = ratings_df.drop('User_id',axis=1).values
#normalize data
ratings_matrix = ratings_matrix / 5
ratings_matrix.shape

(1066, 2135)

In [1396]:
#train test split
train_matrix, test_matrix = train_test_split(ratings_matrix, test_size=0.1)
print(train_matrix.shape, test_matrix.shape)

(959, 2135) (107, 2135)


In [1397]:
#custom loss to not penalize on 0 values in reconstruction
def masked_mse(y_true, y_pred):
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)  # 1 where y_true != 0
    squared_error = tf.square(y_true - y_pred)
    masked_se = mask * squared_error
    return tf.reduce_sum(masked_se) / tf.reduce_sum(mask)  # average over non-zero entries

### Autoencoder Architecture
Used this paper for regularizer, dropout, and re-feeding ideas:
<Link Text>https://dl.acm.org/doi/10.1145/2740908.2742726 <br>

In [1534]:
original_dim = ratings_matrix.shape[1]
encoding_dim = 1024

#input layer: user rating vector
input_layer = layers.Input(shape=(original_dim,))
encoded = layers.Dropout(0.8)(input_layer)


encoded = layers.Dense(encoding_dim, activation='selu', kernel_regularizer=regularizers.l2(0.001), activity_regularizer=regularizers.l1(0.001))(input_layer)
encoded=layers.Dropout(0.8)(encoded)
encoded = layers.Dense(original_dim, activation='selu')(encoded) 

#refeeeding
encoded = layers.Dropout(0.8)(encoded)
encoded = layers.Dense(encoding_dim, activation='selu', kernel_regularizer=regularizers.l2(0.001), activity_regularizer=regularizers.l1(0.001))(encoded)
decoded=layers.Dropout(0.8)(encoded)

# Decoder
output_layer = layers.Dense(original_dim, activation='selu')(decoded) 

# Model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

#compile the model
autoencoder.compile(optimizer='adam', loss= masked_mse)

In [1539]:
#train the model
#callbacks=[early_stopping]
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

autoencoder.fit(train_matrix, train_matrix,
                epochs=1000,
                batch_size=128,
                shuffle=True,
                validation_split=0.2,
                callbacks=[early_stopping])

Epoch 1/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - loss: 1.2541 - val_loss: 8.4649
Epoch 2/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 1.2357 - val_loss: 8.4355
Epoch 3/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 1.2290 - val_loss: 8.4189
Epoch 4/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 147ms/step - loss: 1.2278 - val_loss: 8.4002
Epoch 5/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 147ms/step - loss: 1.2518 - val_loss: 8.3726
Epoch 6/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 139ms/step - loss: 1.2493 - val_loss: 8.3506
Epoch 7/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 145ms/step - loss: 1.2367 - val_loss: 8.3348
Epoch 8/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 140ms/step - loss: 1.2335 - val_loss: 8.3132
Epoch 9/1000
[1m6/6[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7fb4c6d5fdd0>

#### Check the test loss
better

In [1540]:
test_reconstruction = autoencoder.predict(test_matrix)
pred_test_tensor = tf.convert_to_tensor(test_reconstruction, dtype=tf.float32)
true_test_tensor = tf.convert_to_tensor(test_matrix, dtype=tf.float32)
test_loss = masked_mse(true_test_tensor,pred_test_tensor)
print(test_loss)
print(np.sqrt(test_loss))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
tf.Tensor(0.03663658, shape=(), dtype=float32)
0.19140685


In [1543]:
reconstruction_matrix = autoencoder.predict(ratings_matrix)
#scale back
reconstruction_matrix = reconstruction_matrix * 5

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [1544]:
print(reconstruction_matrix.shape)
print(reconstruction_matrix[0:5,:])
print(f'range of reconstructed ratings: {reconstruction_matrix.min()} - {reconstruction_matrix.max()}')

(1066, 2135)
[[4.4336405 4.034663  4.0845413 ... 2.9408464 3.3690574 4.2861285]
 [4.440922  4.028892  4.08317   ... 2.939485  3.371898  4.2897854]
 [4.4265895 4.031069  4.0835423 ... 2.940816  3.3652208 4.2847996]
 [4.4491444 4.039109  4.090173  ... 2.9507623 3.3678176 4.2801733]
 [4.4296103 4.0337534 4.0852323 ... 2.9413068 3.370357  4.282131 ]]
range of reconstructed ratings: -0.19090907275676727 - 4.9073567390441895


### Make recommendations
Certain items are recommended alot

In [1567]:
#number of items to recommend
top_k = 10
#zero out previously rated items, so they are not recommended
rated_indices = np.argwhere(ratings_matrix)
rows, columns = rated_indices.T
reconstruction_matrix[rows,columns] = 0
#get random user
rand_num = np.random.randint(reconstruction_matrix.shape[0],size=1)
user = reconstruction_matrix[rand_num,:]
#make recommendations
recs = np.argsort(-user)[0,10:20]
print(rand_num)
print(recs)
print(ratings_df.columns[recs])
print(f'\npredicted ratings: {user[0,recs]}')

[959]
[1988  523  311 1897  132  734 1170 1735 1711  879]
Index(['Typee', 'Empress Bianca', 'Carthage Ascendant : The Book of Ash 2',
       'The faith of Islam,', 'America en Bicicleta: del Plata a la Habana',
       'Hound of the Baskervilles', 'Odyssey Fulfilled',
       'The Magnificent Ambersons', 'The Ladies' Man',
       'LAS VEGAS BEHIND THE TABLES'],
      dtype='object')

predicted ratings: [4.6318836 4.6316533 4.630043  4.6237698 4.623622  4.617614  4.6166205
 4.6162663 4.607437  4.603854 ]


### Find items with highest predicted ratings

In [1453]:
top_k=10
average_scores = reconstruction_matrix.mean(axis=0)
recs = np.argsort(-average_scores)[0:top_k]
print(f'Recommended Items: {recs}')
print(f'Predicted Ratings: {average_scores[recs]}')

book_recs = ratings_df.columns[recs]
print(f'Recommended Items: {book_recs}')

Recommended Items: [ 953  627 1842 1535  507 1609  105 1123  111  483]
Predicted Ratings: [4.9013767 4.8733506 4.8565493 4.8553643 4.8498116 4.846752  4.8466506
 4.8463826 4.8452168 4.8416686]
Recommended Items: Index(['Lost Girls, Book 2', 'George Orwell', 'The Surgeon's Mate',
       'Taboo : A Novel of Forbidden Sensual Delights.', 'Egg Drop Soup',
       'The Complete Compleat Enchanter', 'After Eve: [Conte Philosophique]',
       'Neurolink', 'Airframe', 'Dubin's Lives'],
      dtype='object')


In [568]:
2**9

512

In [1492]:
print(pred_test_tensor)
print(true_test_tensor)
mask = tf.cast(tf.not_equal(true_test_tensor, 0), tf.float32)  # 1 where y_true != 0
squared_error = tf.square(true_test_tensor - pred_test_tensor)
masked_se = mask * squared_error
print(tf.reduce_sum(masked_se) / tf.reduce_sum(mask))  # average over non-zero entries

tf.Tensor(
[[0.9096581  0.8181278  0.8591702  ... 0.63091624 0.69876987 0.8677832 ]
 [0.9094673  0.8179675  0.8592941  ... 0.6310251  0.6986825  0.8677833 ]
 [0.90965885 0.81800234 0.8592997  ... 0.63088495 0.6987537  0.86777395]
 ...
 [0.9097212  0.81804854 0.8594322  ... 0.63109213 0.6987637  0.86780137]
 [0.9096894  0.81804633 0.8593983  ... 0.630988   0.6986895  0.8677532 ]
 [0.90974414 0.81813437 0.8593306  ... 0.63101006 0.69882905 0.8680497 ]], shape=(107, 2135), dtype=float32)
tf.Tensor(
[[0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0.8]
 ...
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]], shape=(107, 2135), dtype=float32)
tf.Tensor(0.03528607, shape=(), dtype=float32)
