In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import mplcyberpunk as mplnk

In [2]:
import tensorflow as tf
import keras
from keras import Model, layers, optimizers, losses, metrics

2024-07-26 12:16:04.442233: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-26 12:16:04.455444: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-26 12:16:04.474964: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-26 12:16:04.475003: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-26 12:16:04.487754: I tensorflow/core/platform/cpu_feature_gua

In [3]:
from config import Config
from utils import DataLoader
from models import DeepMF, GAN, VAE
from models.metrics.elbo import ELBOLoss
from models.layers import MinMaxNormalization

In [4]:
x_train, y_train, x_test, y_test = DataLoader.load_numpy(tag='movie')

In [5]:
def build_encoder():
    input_dim = 3
    latent_dim = 10
    
    inputs = layers.Input(shape=(input_dim,), name='encode_input')
    x = layers.Dense(units=10, name='dense1')(inputs)
    x = layers.LeakyReLU(negative_slope=0.2, name='leaky_relu_1')(x)
    x = layers.BatchNormalization(momentum=0.8, name='batch_norm')(x)
    outputs = layers.Dense(units=latent_dim, name='dense2')(x)
    
    return Model(inputs=inputs, outputs=outputs, name='encoder')

def build_decoder():
    latent_dim = 10
    output_dim = 3
    
    inputs = layers.Input(shape=(latent_dim,), name='decode_input')
    x = layers.Dense(units=10, name='dense1')(inputs)
    x = layers.LeakyReLU(negative_slope=0.2, name='leaky_relu_1')(x)
    x = layers.BatchNormalization(momentum=0.8, name='batch_norm')(x)
    outputs = layers.Dense(units=output_dim, name='dense2')(x)
    
    return Model(inputs=inputs, outputs=outputs, name='decoder')

In [6]:
vae = VAE(encoder=build_encoder(), decoder=build_decoder(), latent_dim=10)
vae.compile(optimizer=optimizers.Adam(learning_rate=0.01), loss=ELBOLoss, metrics=[metrics.R2Score()])
vae.fit(
    x=x_train,
    y=x_train,
    batch_size=32,
    epochs=1000,
    validation_data=(x_test, x_test),
    validation_freq=5,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor='r2_score', patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5),
    ],
    verbose=1,
)

2024-07-26 12:16:10.132504: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-26 12:16:10.178246: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-26 12:16:10.178707: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Epoch 1/1000


I0000 00:00:1721976372.296342    8441 service.cc:145] XLA service 0x78ef5400e940 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1721976372.296380    8441 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2024-07-26 12:16:12.361898: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-26 12:16:12.660228: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m 102/2500[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 2ms/step - loss: 184379.7969 - r2_score: -4.6153

I0000 00:00:1721976373.540336    8441 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 67576.3047 - r2_score: -0.9917 - learning_rate: 0.0100
Epoch 2/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 2572.6602 - r2_score: 0.6364 - learning_rate: 0.0100
Epoch 3/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 840us/step - loss: 2286.0134 - r2_score: 0.6324 - learning_rate: 0.0100
Epoch 4/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 757us/step - loss: 2212.9658 - r2_score: 0.6317 - learning_rate: 0.0100
Epoch 5/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 982us/step - loss: 2295.8164 - r2_score: 0.6289 - val_loss: 24.0898 - val_r2_score: 0.6606 - learning_rate: 0.0100
Epoch 6/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 2195.0364 - r2_score: 0.6325 - learning_rate: 0.0100
Epoch 7/1000
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x78f015ae2310>

In [7]:
x_train_embedded = vae.predict(x=x_train, mode='encode')

[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507us/step


In [11]:
def build_generator():
    noise_dim = 100
    latent_dim = 5
    
    inputs = layers.Input(shape=(noise_dim,), name='gen_input')
    x = layers.Dense(units=10, name='dense_1')(inputs)
    x = layers.LeakyReLU(negative_slope=0.2, name='leaky_relu_1')(x)
    x = layers.BatchNormalization(momentum=0.8, name='batch_norm')(x)
    x = layers.Dense(20, name='dense_2')(x)
    x = layers.LeakyReLU(negative_slope=0.2, name='leaky_relu_2')(x)
    x = layers.Dropout(rate=0.2, name='dropout')(x)
    outputs = layers.Dense(units=10, name='dense3')(x)
    
    return Model(inputs=inputs, outputs=outputs, name='generator')

def build_discriminator():
    latent_dim = 5
    
    inputs = layers.Input(shape=(10,), name='disc_input')
    x = layers.Dense(units=4, name='dense4')(inputs)
    x = layers.LeakyReLU(negative_slope=0.2, name='leaky_relu1')(x)
    x = layers.Dense(units=1, name='dense5')(x)
    x = layers.Activation('sigmoid', name='sigmoid')(x)

    return Model(inputs=inputs, outputs=x, name='discriminator')

In [12]:
gan = GAN(
    generator=build_generator(),
    discriminator=build_discriminator(),
    noise_dim=100
)

gan.compile(
    gen_optimizer=optimizers.Adam(),
    disc_optimizer=optimizers.Adam()
)

gan.fit(
    x=x_train_embedded,
    epochs=50,
    batch_size=32
)

gan.save(filepath=Config.Paths.REGISTRY_PATH / 'gan.keras')

Epoch 1/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - disc_loss: 1.1554 - gen_loss: 0.9199 - loss: 0.0000e+00
Epoch 2/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - disc_loss: 1.3993 - gen_loss: 0.8884 - loss: 0.0000e+00
Epoch 3/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - disc_loss: 1.4060 - gen_loss: 0.7548 - loss: 0.0000e+00
Epoch 4/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - disc_loss: 1.4032 - gen_loss: 0.7387 - loss: 0.0000e+00
Epoch 5/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - disc_loss: 1.3529 - gen_loss: 0.7759 - loss: 0.0000e+00
Epoch 6/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - disc_loss: 1.3457 - gen_loss: 0.7646 - loss: 0.0000e+00
Epoch 7/50
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - disc_loss: 1.3827 - gen_loss: 0.76

In [20]:
gan = keras.models.load_model(Config.Paths.REGISTRY_PATH / 'gan.keras')

In [13]:
STD_DEV = 1.5
noise = np.random.normal(0, STD_DEV, (50000, 100))

In [14]:
gen_data = gan.predict(noise).numpy()

In [18]:
gen_data.shape

(50000, 10)

In [19]:
gen_data_decoded = vae.predict(x=gen_data, mode='decode')

[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 754us/step


In [46]:
def normalize_array(arr, min_val=0, max_val=1):
    current_min = np.min(arr)
    current_max = np.max(arr)
    
    normalized_arr = min_val + (arr - current_min) * (max_val - min_val) / (current_max - current_min)
    
    return normalized_arr

In [51]:
pd.DataFrame(normalize_array(gen_data_decoded[:, 2], 1, 5).round()).value_counts()

0  
4.0    44401
3.0     4988
5.0      441
2.0      164
1.0        6
Name: count, dtype: int64