In [1]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pickle
import logging
import time
from tensorflow.keras.layers import GRU, Flatten, Dense, Conv1D, Dropout, LeakyReLU
from tensorflow.keras import Sequential
from sklearn.metrics import mean_squared_error
from pathlib import Path
import os

%load_ext blackcellmagic

# %%black for formatting

2022-03-04 01:08:57.715368: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-04 01:08:57.715384: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

In [3]:
load_path = Path(os.path.abspath('')).parents[0] / 'data' / 'scaled_data' 
load_path

PosixPath('/home/kuba1302/Gan/gan/data/scaled_data')

In [4]:
names = ['X_list', 'Y_preds_real_list', 'Y_whole_real_list']
def load_df_lists(names):
    data_dict = {}
    for name in names:
        with open(load_path / f'{name}.pickle', 'rb') as handle:
            data_dict[name] = pickle.load(handle)

data_dict = load_df_lists(names)

In [5]:
with open(load_path / 'X_list.pickle', 'rb') as test:
    X_list = pickle.load(test) 
with open(load_path / 'Y_preds_real_list.pickle', 'rb') as test:
    Y_preds_real_list = pickle.load(test) 
with open(load_path / 'Y_whole_real_list.pickle', 'rb') as test:
    Y_whole_real_list = pickle.load(test) 

In [6]:
data_dict

In [7]:
def generator(input_dim, feature_size, output_dim=1):
    model = Sequential()
    model.add(
        GRU(
            units=1024,
            return_sequences=True,
            input_shape=(input_dim, feature_size),
            recurrent_dropout=0.2,
        )
    )
    model.add(GRU(units=512, return_sequences=True, recurrent_dropout=0.2))
    model.add(GRU(units=256, return_sequences=False, recurrent_dropout=0.2))
    model.add(Dense(128))
    model.add(Dense(64))
    model.add(Dense(units=output_dim))
    return model


def discriminator(input_shape):
    model = tf.keras.Sequential()
    model.add(
        Conv1D(
            32,
            input_shape=input_shape,
            kernel_size=3,
            strides=2,
            padding="same",
            activation=LeakyReLU(alpha=0.01),
        )
    )
    model.add(
        Conv1D(
            64,
            kernel_size=5,
            strides=2,
            padding="same",
            activation=LeakyReLU(alpha=0.01),
        )
    )
    model.add(
        Conv1D(
            128,
            kernel_size=5,
            strides=2,
            padding="same",
            activation=LeakyReLU(alpha=0.01),
        )
    )
    model.add(Flatten())
    model.add(Dense(220, use_bias=False))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dense(220, use_bias=False, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    return model

In [8]:
class StockTimeGan:
    def __init__(self, generator, discriminator, learning_rate=0.00016):
        self.learning_rate = learning_rate
        self.generator = generator
        self.generator_optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate)
        self.discriminator = discriminator
        self.discriminator_optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate)
        self.loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        self.checkpoint_directory = str(
            Path(os.path.abspath("")).parents[0] / "/checkpoints/"
        )
        self.checkpoint_prefix = str(Path(self.checkpoint_directory) / "ckpt")
        self.checkpoint = tf.train.Checkpoint(
            generator_optimizer=self.generator_optimizer,
            discriminator_optimizer=self.discriminator_optimizer,
            generator=self.generator,
            discriminator=self.discriminator,
        )

    def discriminator_loss(self, real_output, fake_output):
        real_loss = self.cross_entropy(tf.ones_like(real_output), real_output)
        fake_loss = self.cross_entropy(tf.zeros_like(fake_output), fake_output)
        total_loss = real_loss + fake_loss
        return total_loss

    def generator_loss(self, fake_output):
        return self.cross_entropy(tf.ones_like(fake_output), fake_output)

    @tf.function
    def train_step(self, real_x, real_to_pred_y, real_whole_y): 
        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generated_data = self.generator(real_x, training=True)
            generated_data_reshape = tf.reshape(
                generated_data, [generated_data.shape[0], generated_data.shape[1], 1]
            )
            d_fake_input = tf.concat(
                [tf.cast(generated_data_reshape, tf.float64), real_to_pred_y], axis=1
            )
            real_y_reshape = tf.reshape(real_whole_y, [real_whole_y.shape[0], real_whole_y.shape[1], 1])
            d_real_input = tf.concat([real_y_reshape, real_to_pred_y], axis=1)
            real_output = self.discriminator(d_real_input, training=True)
            fake_output = self.discriminator(d_fake_input, training=True)
            gen_loss = self.generator_loss(fake_output)
            disc_loss = self.discriminator_loss(real_output, fake_output)

        gradients_of_generator = gen_tape.gradient(
            gen_loss, self.generator.trainable_variables
        )
        gradients_of_discriminator = disc_tape.gradient(
            disc_loss, self.discriminator.trainable_variables
        )

        self.generator_optimizer.apply_gradients(
            zip(gradients_of_generator, self.generator.trainable_variables)
        )
        self.discriminator_optimizer.apply_gradients(
            zip(gradients_of_discriminator, self.discriminator.trainable_variables)
        )
        return real_to_pred_y, generated_data, disc_loss, gen_loss

    def train(self, real_x, real_to_pred_y, real_whole_y, epochs):
        train_history = {}
        train_history['gen_loss'] = []
        train_history['disc_loss'] = []
        train_history['real_y'] = []
        train_history['pred_y'] = []

        for i in range(epochs): 
            start_time = time.time()
            real_to_pred_y, generated_data, disc_loss, gen_loss = self.train_step(real_x, real_to_pred_y, real_whole_y)
            train_history['gen_loss'].append(gen_loss)
            train_history['disc_loss'].append(disc_loss)
            train_history['real_y'].append(real_whole_y)
            train_history['pred_y'].append(generated_data)
            end_time = time.time()
            epoch_time = end_time - start_time
            rmse = np.sqrt(mean_squared_error(real_whole_y, generated_data))
            logging.info(f'Epoch: {i} - RMSE: {rmse} - Epoch time: {epoch_time}')

        return train_history


In [9]:
generator = generator(X_list.shape[1], X_list.shape[2])
discriminator = discriminator((X_list.shape[1], Y_preds_real_list.shape[1]))

2022-03-04 01:08:59.349395: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-03-04 01:08:59.349423: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-03-04 01:08:59.349443: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kuba1302-Inspiron-5502): /proc/driver/nvidia/version does not exist
2022-03-04 01:08:59.349713: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
gan = StockTimeGan(generator, discriminator)

  super(Adam, self).__init__(name, **kwargs)


In [11]:
train_history = gan.train(X_list, Y_preds_real_list, Y_whole_real_list, epochs=10)

ValueError: in user code:

    File "/tmp/ipykernel_2316631/1608768814.py", line 36, in train_step  *
        d_fake_input = tf.concat(

    ValueError: Shape must be rank 3 but is rank 2 for '{{node concat}} = ConcatV2[N=2, T=DT_DOUBLE, Tidx=DT_INT32](Cast, real_to_pred_y, concat/axis)' with input shapes: [713,1,1], [713,1], [].
