In [5]:
import matplotlib.pyplot as plt

In [6]:
import sys
from os.path import dirname
sys.path.append(dirname('./synthcity/src/'))

In [7]:
import torch
import torch.nn as nn
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [24]:
from sklearn.model_selection import train_test_split

In [47]:
import torch.optim as optim
from torch.utils.data import DataLoader

In [8]:
import seaborn as sns

In [9]:
from synthcity.plugins import Plugins
from synthcity.benchmark import Benchmarks

    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


In [10]:
Plugins(categories=["time_series"]).list()

['timegan', 'fflows', 'timevae']

In [11]:
from synthcity.utils.datasets.time_series.pbc import PBCDataloader
from synthcity.utils.datasets.time_series.google_stocks import GoogleStocksDataloader
from synthcity.plugins.core.dataloader import TimeSeriesDataLoader

In [13]:
static_data, temporal_data, horizons, outcome = GoogleStocksDataloader().load()

In [14]:
data = TimeSeriesDataLoader(
    temporal_data=temporal_data,
    observation_times=horizons,
    static_data=static_data,
    outcome=outcome,
)

In [15]:
syn_model_without_dp = Plugins().get("timegan", dp_enabled=False)

[2023-08-29T11:43:40.469226-0400][20220][CRITICAL] module disabled: C:\Users\ma7mo\AppData\Local\Programs\Python\Python310\lib\site-packages\synthcity\plugins\generic\plugin_goggle.py


In [15]:
new_model = Plugins().get("timegan", mode="LSTM")

[2023-08-29T11:21:03.698570-0400][1524][CRITICAL] module disabled: C:\Users\ma7mo\AppData\Local\Programs\Python\Python310\lib\site-packages\synthcity\plugins\generic\plugin_goggle.py
[2023-08-29T11:21:03.698570-0400][1524][CRITICAL] module disabled: C:\Users\ma7mo\AppData\Local\Programs\Python\Python310\lib\site-packages\synthcity\plugins\generic\plugin_goggle.py


In [16]:
new_model.fit(data)

  4%|███                                                                             | 39/1000 [00:26<10:43,  1.49it/s]


KeyboardInterrupt: 

In [16]:
syn_model_without_dp.fit(data)

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [04:00<00:00,  4.16it/s]


<synthcity.plugins.time_series.plugin_timegan.TimeGANPlugin at 0x162eb365db0>

In [39]:
synth_data = syn_model_without_dp.generate(count=len(data)/10)

In [40]:
synth_data

Unnamed: 0,seq_id,seq_time_id,seq_temporal_Close,seq_temporal_High,seq_temporal_Low,seq_temporal_Open,seq_temporal_Volume,seq_out_Open_next
0,0,0.992692,0.395024,0.356342,0.411729,0.372473,0.267372,0.316910
1,0,0.947454,0.408647,0.360036,0.402173,0.373941,0.218904,0.316910
2,0,0.949977,0.411638,0.353162,0.402652,0.369543,0.256489,0.316910
3,0,0.948791,0.412320,0.349451,0.406865,0.376446,0.240710,0.316910
4,0,0.893649,0.433219,0.352305,0.410291,0.377639,0.215010,0.316910
...,...,...,...,...,...,...,...,...
495,49,0.853573,0.420925,0.357470,0.404775,0.380247,0.189489,0.316911
496,49,0.878370,0.424703,0.355659,0.416932,0.374426,0.208137,0.316911
497,49,0.870931,0.412096,0.366250,0.414209,0.388721,0.240522,0.316911
498,49,0.873585,0.420702,0.371973,0.419031,0.200618,0.262444,0.316911


In [19]:
def create_sequence(dataset, seq_len):
    sequences = []
    labels = []


    for index in range(0, len(dataset)//seq_len): # Selecting 50 rows at a time
        sequences.append(dataset.iloc[index*seq_len:(index+1)*seq_len-1])
        labels.append(dataset.iloc[(index+1)*seq_len-1][2])
    return (np.array(sequences),np.array(labels))

In [73]:
class Network(nn.Module):
    def __init__(self, dropout,
                 input_dim=8, num_classes = 1, hidden_size = 1, **kwargs):

        super(Network, self).__init__()

        print("==> not used params in network class:", kwargs.keys())

        self.input_dim = input_dim
        self.dropout = dropout
        self.hidden_size = hidden_size


        # Main part of the network
        self.lstm_layers = nn.ModuleList()
        self.lstm_layers.append(nn.LSTM(input_size=input_dim,hidden_size=self.hidden_size))
        self.lstm_layers.append(nn.Dropout(p=dropout))
        self.lstm_layers.append(nn.LSTM(input_size=self.hidden_size,
                           hidden_size=self.hidden_size))
        self.lstm_layers.append(nn.Dropout(p=dropout))
        self.lstm_layers.append(nn.LSTM(input_size=self.hidden_size,
                           hidden_size=self.hidden_size))
        self.lstm_layers.append(nn.Dropout(p=dropout))
        # Output layer
        self.output_layer = nn.Linear(self.hidden_size, num_classes)
        # self.output_activation = nn.Sigmoid()

    def forward(self, X, lengths=None):
        # Input layer
        X = X.to(torch.float32)
        mX = X
        # Main part of the network
        i = 0
        for lstm in self.lstm_layers:
            if i % 2 == 0:
                output, _ = lstm(mX)
            else:
                output = lstm(mX)
            mX = output
            i += 1
        L = output[:, -1, :]
        # Output layer
        y = self.output_layer(L)
        return y


In [104]:
model = Network(0.2)

==> not used params in network class: dict_keys([])


In [105]:
optimizer_config = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
loss = nn.MSELoss()

In [106]:
X, y = create_sequence(data.dataframe(), 10)

In [107]:
synth_X, synth_y = create_sequence(synth_data.dataframe(), 10)

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [109]:
train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=1)
val_loader = DataLoader(list(zip(X_test, y_test)), batch_size=1, shuffle=True)
optimizer = optimizer_config

In [110]:
for epoch in range(0, 100):
        model.train()
        running_loss = 0
        for i, batch in enumerate(train_loader):
            optimizer.zero_grad()
            inputs, targets = batch
            outputs = model(inputs)
            targets = targets.reshape(tuple(outputs.shape))
            targets = targets.to(torch.float32)
            batch_loss = loss(outputs, targets)
            batch_loss.backward()
            optimizer.step()
        model.eval()
        val_loss = 0
        with torch.no_grad():
            num_batches = 0
            res = {}
            y_true = []
            predictions = []
            for batch in val_loader:
                inputs, targets = batch
                outputs = model(inputs)
                targets = targets.reshape(tuple(outputs.shape))
                targets = targets.to(torch.float32)
                batch_loss = loss(outputs, targets)
                val_loss += batch_loss.item()
                num_batches += 1
            val_loss /= num_batches
            print('Epoch [{}/{}], Step [{}/{}], Validation Batch Loss: {:.4f}'
                    .format(epoch + 1, 100, i + 1, len(train_loader), val_loss))

Epoch [1/100], Step [33/33], Validation Batch Loss: 0.1087
Epoch [2/100], Step [33/33], Validation Batch Loss: 0.0921
Epoch [3/100], Step [33/33], Validation Batch Loss: 0.0782
Epoch [4/100], Step [33/33], Validation Batch Loss: 0.0674
Epoch [5/100], Step [33/33], Validation Batch Loss: 0.0587
Epoch [6/100], Step [33/33], Validation Batch Loss: 0.0523
Epoch [7/100], Step [33/33], Validation Batch Loss: 0.0473
Epoch [8/100], Step [33/33], Validation Batch Loss: 0.0437
Epoch [9/100], Step [33/33], Validation Batch Loss: 0.0412
Epoch [10/100], Step [33/33], Validation Batch Loss: 0.0395
Epoch [11/100], Step [33/33], Validation Batch Loss: 0.0386
Epoch [12/100], Step [33/33], Validation Batch Loss: 0.0381
Epoch [13/100], Step [33/33], Validation Batch Loss: 0.0381
Epoch [14/100], Step [33/33], Validation Batch Loss: 0.0384
Epoch [15/100], Step [33/33], Validation Batch Loss: 0.0388
Epoch [16/100], Step [33/33], Validation Batch Loss: 0.0394
Epoch [17/100], Step [33/33], Validation Batch Lo

In [115]:
synth_model = Network(0.2)

==> not used params in network class: dict_keys([])


In [116]:
optimizer_config = optim.Adam(synth_model.parameters(), lr=0.001, betas=(0.9, 0.999))
loss = nn.MSELoss()

In [117]:
train_loader = DataLoader(list(zip(synth_X, synth_y)), batch_size=1, shuffle=True)
val_loader = DataLoader(list(zip(X_test, y_test)), batch_size=1)
optimizer = optimizer_config

In [118]:
for epoch in range(0, 100):
        synth_model.train()
        running_loss = 0
        for i, batch in enumerate(train_loader):
            optimizer.zero_grad()
            inputs, targets = batch
            outputs = synth_model(inputs)
            targets = targets.reshape(tuple(outputs.shape))
            targets = targets.to(torch.float32)
            batch_loss = loss(outputs, targets)
            batch_loss.backward()
            optimizer.step()
        synth_model.eval()
        val_loss = 0
        with torch.no_grad():
            num_batches = 0
            res = {}
            y_true = []
            predictions = []
            for batch in val_loader:
                inputs, targets = batch
                outputs = synth_model(inputs)
                targets = targets.reshape(tuple(outputs.shape))
                targets = targets.to(torch.float32)
                batch_loss = loss(outputs, targets)
                val_loss += batch_loss.item()
                num_batches += 1
            val_loss /= num_batches
            print('Epoch [{}/{}], Step [{}/{}], Validation Batch Loss: {:.4f}'
                    .format(epoch + 1, 100, i + 1, len(train_loader), val_loss))

Epoch [1/100], Step [50/50], Validation Batch Loss: 0.2965
Epoch [2/100], Step [50/50], Validation Batch Loss: 0.2411
Epoch [3/100], Step [50/50], Validation Batch Loss: 0.1967
Epoch [4/100], Step [50/50], Validation Batch Loss: 0.1637
Epoch [5/100], Step [50/50], Validation Batch Loss: 0.1356
Epoch [6/100], Step [50/50], Validation Batch Loss: 0.1135
Epoch [7/100], Step [50/50], Validation Batch Loss: 0.0958
Epoch [8/100], Step [50/50], Validation Batch Loss: 0.0815
Epoch [9/100], Step [50/50], Validation Batch Loss: 0.0722
Epoch [10/100], Step [50/50], Validation Batch Loss: 0.0645
Epoch [11/100], Step [50/50], Validation Batch Loss: 0.0583
Epoch [12/100], Step [50/50], Validation Batch Loss: 0.0536
Epoch [13/100], Step [50/50], Validation Batch Loss: 0.0505
Epoch [14/100], Step [50/50], Validation Batch Loss: 0.0486
Epoch [15/100], Step [50/50], Validation Batch Loss: 0.0475
Epoch [16/100], Step [50/50], Validation Batch Loss: 0.0461
Epoch [17/100], Step [50/50], Validation Batch Lo