In [28]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

In [40]:
ratings_df = pd.read_csv('ratings_matrix.csv')
ratings_df.head()

Unnamed: 0,User_id,"'HUMBLE PIE , MY AUTOBIOGRAPHY'","1,000 Places to See Before You Die: A Traveler's Life List",10 Stupid Things Couples Do To Mess Up Their Relationships,1001 Ways to Market Your Books: For Authors and Publishers (Book Marketing Series),1901: A Novel,1906,1st to Die: A Novel,2 Years to a Million in Real Estate,2001: A Space Odyssey,...,You Remind Me of Me,Your Body's Many Cries for Water: A Preventive and Self-Education Manual for Those Who Prefer to Adhere to the Logic of the Natural and the Simple in,Your Present: A Half Hour of Peace,Zane's Gettin' Buck Wild: Sex Chronicles II,Zen Shorts (Caldecott Honor Book),Zen in the art of archery,Zig Ziglar's Secrets of Closing the Sale,forever,prince caspian: the return to narnia,the great brain
0,A106016KSI0YQ,,,,,,,,,,...,,,,,,,,,,
1,A10LWBOIZCF2QT,,,,,,,,,,...,,,,,,,,,,
2,A10T0OW97SFBB,,,,,,,,,,...,,,,,,,,,5.0,
3,A116J8AUC3JSN2,,,,,,,,,,...,4.0,,,,,,,,,
4,A11B61QBGHLQDN,,,,,,,,,,...,,,,,,,,,,


In [41]:
ratings_df.fillna(0, inplace=True)
#ratings_df = ratings_df.astype(int)
ratings_matrix = ratings_df.drop('User_id',axis=1).values
#normalize data
ratings_matrix = ratings_matrix/5.0
ratings_matrix.shape

(940, 1236)

In [42]:
#custom loss to not penalize on 0 values in reconstruction
def masked_mse(y_true, y_pred):
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)  # 1 where y_true != 0
    squared_error = tf.square(y_true - y_pred)
    masked_se = mask * squared_error
    return tf.reduce_sum(masked_se) / tf.reduce_sum(mask)  # average over non-zero entries

In [43]:
original_dim = ratings_matrix.shape[1]
encoding_dim = 64

# Input: user's interaction vector (sparse or dense)
input_layer = layers.Input(shape=(original_dim,))

# Encoder
encoded = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.1))(input_layer)
#encoded = layers.Dropout(0.5)(encoded)
#encoded = layers.Dense(264, activation='relu')(encoded)
#encoded = layers.Dropout(0.5)(encoded)
encoded = layers.Dense(encoding_dim, activation='relu', activity_regularizer=regularizers.l1(0.00001))(encoded)

# Decoder
#decoded = layers.Dense(264, activation='relu')(encoded)
decoded = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.1))(encoded)
#decoded = layers.Dropout(0.5)(decoded)
output_layer = layers.Dense(original_dim, activation='linear')(decoded) 

# Model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

#compile the model
autoencoder.compile(optimizer='adam', loss= masked_mse)

In [44]:
#train the model
#callbacks=[early_stopping]
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

autoencoder.fit(ratings_matrix, ratings_matrix,
                epochs=100,
                batch_size=128,
                shuffle=True,
                validation_split=0.2,
                callbacks=[early_stopping])

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 67ms/step - loss: 79.0620 - val_loss: 62.5542
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 58.1892 - val_loss: 45.1082
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 41.7230 - val_loss: 31.7555
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 29.2676 - val_loss: 21.9943
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 20.1829 - val_loss: 14.9775
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 13.7037 - val_loss: 10.0731
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 9.1954 - val_loss: 6.7226
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 6.1318 - val_loss: 4.4840
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f11a810c410>

In [45]:
reconstruction_matrix = autoencoder.predict(ratings_matrix)
#scale back
reconstruction_matrix = reconstruction_matrix * 5

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [46]:
print(reconstruction_matrix.shape)
print(reconstruction_matrix[0:5,:])

(940, 1236)
[[4.501551  4.245444  3.6565008 ... 3.3264613 4.223231  5.0082617]
 [4.5015507 4.245444  3.6565008 ... 3.3264613 4.223231  5.0082617]
 [4.501551  4.245444  3.6565008 ... 3.3264613 4.223231  5.0082617]
 [4.501551  4.245444  3.6565008 ... 3.3264613 4.223231  5.0082617]
 [4.501551  4.245444  3.6565008 ... 3.3264613 4.223231  5.0082617]]


### Make recommendations
Certain items are recommended alot

In [50]:
#number of items to recommend
top_k = 10
#zero out previously rated items, so they are not recommended
rated_indices = np.argwhere(ratings_matrix)
rows, columns = rated_indices.T
reconstruction_matrix[rows,columns] = 0
#get random user
user = reconstruction_matrix[np.random.randint(reconstruction_matrix.shape[0],size=1),:]
#make recommendations
recs = np.argsort(-user)[0,0:top_k]
print(recs)
print(ratings_df.columns[recs])
print(f'\npredicted ratings: {user[0,recs]}')

[956  95 718 760 552 941 541 529 847 489]
Index(['The Long Goodbye', 'Autobiography of Benjamin Franklin',
       'Secret Sanction', 'Small Island', 'Messenger',
       'The Last Kingdom (The Saxon Chronicles Series #1)',
       'March Upcountry (Library Edition)', 'Magician',
       'The Calling of Emily Evans (Women of the West #1) (Janette Oke Classics for Girls)',
       'Liberal Fascism: The Secret History of the American Left, From Mussolini to the Politics of Meaning'],
      dtype='object')

predicted ratings: [5.014163  5.012723  5.0124526 5.0121546 5.0117064 5.011523  5.011405
 5.0111103 5.011016  5.010932 ]


### Find items with highest predicted ratings

In [51]:
top_k=10
average_scores = reconstruction_matrix.mean(axis=0)
recs = np.argsort(-average_scores)[0:top_k]
print(f'Recommended Items: {recs}')
print(f'Predicted Ratings: {average_scores[recs]}')

book_recs = ratings_df.columns[recs]
print(f'Recommended Items: {book_recs}')

Recommended Items: [665 101 825 430 403 965 671 158 688 681]
Predicted Ratings: [5.003776  5.00362   5.0034204 5.0021696 5.001945  5.001184  5.000971
 5.0007057 5.0005875 5.000493 ]
Recommended Items: Index(['Prodigal Son (Dean Koontz's Frankenstein, Book 1)', 'BEING PEACE',
       'The Art of War (The Leather-Bound Library of Military History)',
       'Inspired By... The Bible Experience: New Testament',
       'How to eat fried worms (A Dell yearling book)',
       'The Making of a Chef: Mastering Heat at the Culinary Institute of America',
       'Queen of Camelot',
       'Buddhism Without Beliefs: A Contemporary Guide to Awakening',
       'Resurrection Day', 'Ramona the Pest'],
      dtype='object')
