In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_probability as tfp
from sklearn.model_selection import train_test_split

tfd = tfp.distributions
tfb = tfp.bijectors
tfpl = tfp.layers

In [None]:
items_features = pd.read_csv('../dataset/processed_data/simplified_features_and_categories_30.csv')
items_features.head()

Unnamed: 0,item_id,feature_idx
0,19021,0
1,19021,22
2,19021,15
3,19021,446
4,19021,1


In [None]:
items_features['item_id'].max()

23691

In [None]:
dataset = np.zeros(
    (items_features['item_id'].max()+1, items_features['feature_idx'].max()+1)
    )
dataset[(items_features['item_id'], items_features['feature_idx'])] = 1

dataset
train_dataset=dataset[1:,:]

In [None]:
from sklearn.model_selection import train_test_split
train,test=train_test_split(train_dataset,test_size=0.2,random_state=941116)

In [None]:
dataset.shape

(23692, 636)

In [None]:
train.shape

(18952, 636)

In [None]:
test.shape

(4739, 636)

# Autoencoder

In [None]:
latent_dimension = 64
starting_dimension = 512

inputs = layers.Input(items_features['feature_idx'].max()+1)

x = inputs 
dimension = starting_dimension

while(dimension != latent_dimension):
    #x = layers.BatchNormalization()(x)
    x = layers.Dense(units=dimension, activation='gelu',activity_regularizer=tf.keras.regularizers.L1L2(l1=1e-5,l2=1e-5))(x)
    x = layers.Dropout(0.1)(x)

    dimension = dimension//2

x = layers.Dense(units=dimension,activation="tanh",activity_regularizer=tf.keras.regularizers.L1L2(l1=5e-5,l2=4e-4))(x)
outputs = x
encoder = tf.keras.Model(inputs=inputs, outputs=outputs, name='encoder')

In [None]:
max_width = 1024
ending_dimension = items_features['feature_idx'].max()+1

inputs = layers.Input(latent_dimension)

x = inputs 
dimension = latent_dimension

x = layers.Dense(units=256, activation='gelu',activity_regularizer=tf.keras.regularizers.L1L2(l1=1e-5,l2=1e-5))(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(units=512, activation='gelu',activity_regularizer=tf.keras.regularizers.L1L2(l1=1e-5,l2=1e-5))(x)
x = layers.Dropout(0.2)(x)
    

x = layers.Dense(units=ending_dimension,activation="sigmoid")(x)
outputs = x

decoder = tf.keras.Model(inputs=inputs, outputs=outputs, name='decoder')

#decoder.summary()

In [None]:
ae = tf.keras.Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs), name='auto_encoder')
#ae.summary()

In [None]:
def custom_loss(x_true,x_pred):
    return tf.keras.losses.cosine_similarity(x_true,x_pred)+tf.keras.losses.mean_squared_error(x_true,x_pred)#

In [None]:
ae.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.006), 
    loss=custom_loss,
    metrics=[tf.keras.metrics.CosineSimilarity(),tf.keras.metrics.MeanAbsoluteError()]
    )

#Training

In [None]:
ae.fit(
    train,
    train, 
    batch_size=64,
    validation_data=(test,test),
    epochs=200,
    shuffle=True,
    callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor='cosine_similarity',
                patience=10,
                restore_best_weights=True,
                mode="max"
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_cosine_similarity',
                factor=0.7,
                patience=2,
                min_lr=5e-5,
                mode="max",
                min_delta=0.0001,
            )
        ]
    )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
temp=encoder(dataset)

In [None]:
temp.shape

TensorShape([23692, 64])

In [None]:
temp

In [None]:
tf.reduce_sum(temp,axis=1).numpy().max(),tf.reduce_sum(temp,axis=1).numpy().min(),tf.reduce_sum(temp,axis=1).numpy().std()

5.268532

In [None]:
np.save('../dataset/processed_data/compressed_features', encoder(dataset))