## Deep learning

Automatically learn complex & non-linear patterns from diverse data types

### Autoencoder
Self-supervised neural network architectures that has an encoder and a decoder, aiming for the outputs to be identical to the inputs  

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

2024-02-22 10:51:21.145719: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the dataset

file_path = 'ml-1m/ratings.dat'
columns = ['user_id','item_id','rating','timestamp']
df = pd.read_csv(file_path, sep='\t', names=columns)

In [3]:
# Create user-item interaction matrix

user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating')

In [4]:
# Split the data into training and testing sets

train_data, test_data = train_test_split(user_item_matrix, test_size=0.2, random_state=42)

In [6]:
# Build the autoencoder model

num_users, num_items = user_item_matrix.shape
latent_dim = 50

input_layer = Input(shape=(num_items,))
encoded = Dense(latent_dim, activation='relu')(input_layer)
decoded = Dense(num_items, activation='sigmoid')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [8]:
# Train the autoencoder

autoencoder.fit(train_data, train_data, epochs=10, batch_size=64, shuffle=True, validation_data=(test_data, test_data))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x141032110>

In [9]:
# Extract user and item representations from the encoder part of the autoencoder

encoder = Model(inputs=input_layer, outputs=encoded)
user_embeddings = encoder.predict(user_item_matrix)



In [10]:
# Example: Recommend items for a specific user

user_id = 1   # replace with the desrired user id
user_representation = user_embeddings[user_id -1]

In [11]:
# Calculate the predicted ratings of all times

predicted_ratings = np.dot(user_embeddings, user_representation)

In [12]:
# Display top N recommendations

top_n = np.argsort(predicted_ratings)[::-1][:10]
print(f'Top recommendations for User {user_id}: {top_n +1}')

Top recommendations for User 1: [1000209  333412  333410  333409  333408  333407  333406  333405  333404
  333403]


### Restricted Boltzmann Machine (RBM)
- a type of stochastic artificial neural network with no output layer

In [13]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [14]:
# Load the dataset

file_path = 'ml-1m/ratings.dat'
columns = ['user_id','item_id','rating','timestamp']
df = pd.read_csv(file_path, sep='\t', names=columns)

In [15]:
# Create user-item interaction matrix

user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating')

In [20]:
# Binarize the ratings (0 if not rated, 1 if rated)

user_item_matrix_binary = (user_item_matrix > 0).astype(float)

In [21]:
# Split the data into training and testing datas

train_data, test_data = train_test_split(user_item_matrix_binary, test_size=0.2, random_state=42)

In [22]:
# RBM parameters

num_visible = num_items = user_item_matrix_binary.shape[1]
num_hidden = 50
batch_size = 64
epochs = 10

In [23]:
# Build the RBM model

visible_layer = Input(shape=(num_visible,))
hidden_layer = Dense(num_hidden, activation='sigmoid')(visible_layer)
visible_layer_reconstruction = Dense(num_visible, activation='sigmoid')(hidden_layer)

rbm = Model(inputs=visible_layer, outputs=visible_layer_reconstruction)
rbm.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [24]:
# Train the RBM

rbm.fit(train_data, train_data, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(test_data, test_data))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x13fdd2cd0>

In [26]:
# Extract user and item representations from the hidden layer

user_embeddings = rbm.layers[1].get_weights()[0].T
item_embdeddings = rbm.layers[1].get_weights()[0]

In [28]:
# Example: Recommend items for a specific user

user_id = 1  # replace with the sdesired user ID
user_representation = user_embeddings[user_id - 1]

In [29]:
# Calculate the predicted ratings for all items

predicted_ratings = np.dot(user_embeddings, user_representation)

In [30]:
# Display top N recommendations

top_n = np.argsort(predicted_ratings)[::-1][:10]
print(f'Top recommendations for User {user_id}: {top_n +1}')

Top recommendations for User 1: [42  2  9 48 18  3 41 22 19 27]
