In [2]:
import matplotlib.pyplot as plt

In [3]:
import sys
from os.path import dirname
sys.path.append(dirname('./synthcity/src/'))

In [4]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

In [5]:
import time

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
from sklearn.model_selection import TimeSeriesSplit

In [8]:
from sklearn import preprocessing 

In [9]:
from datetime import datetime

In [10]:
import torch.optim as optim
from torch.utils.data import DataLoader

In [11]:
from synthcity.plugins import Plugins
from synthcity.benchmark import Benchmarks

In [12]:
from synthcity.utils.datasets.time_series.pbc import PBCDataloader
from synthcity.utils.datasets.time_series.google_stocks import GoogleStocksDataloader
from synthcity.plugins.core.dataloader import TimeSeriesDataLoader

In [13]:
from gretel_synthetics.timeseries_dgan.dgan import DGAN
from gretel_synthetics.timeseries_dgan.config import DGANConfig

In [14]:
from gretel_synthetics.timeseries_dgan.config import DfStyle

In [15]:
from sklearn.preprocessing import MinMaxScaler

In [16]:
data = pd.read_csv("data/card_transaction_trimmed.csv")

In [17]:
label_encoder = preprocessing.LabelEncoder()

In [18]:
data.columns

Index(['Unnamed: 0', 'User', 'Card', 'Year', 'Month', 'Day', 'Time', 'Amount',
       'Use Chip', 'Merchant Name', 'Merchant City', 'Merchant State', 'Zip',
       'MCC', 'Errors?', 'Is Fraud?', 'date'],
      dtype='object')

In [19]:
len(data)

715200

In [20]:
num_seq_len = {}

In [21]:
max_user = data['User'].max()

In [22]:
max_user

1999

In [23]:
data = data.drop('Errors?', axis=1)

In [24]:
data["Merchant Name"] = label_encoder.fit_transform(data["Merchant Name"])
data["Merchant City"] = label_encoder.fit_transform(data["Merchant City"])
data["Merchant State"] = label_encoder.fit_transform(data["Merchant State"])
data["Zip"] = label_encoder.fit_transform(data["Zip"])
data["Use Chip"] = label_encoder.fit_transform(data["Use Chip"])
data['Amount'] = data['Amount'].str[1:]
data['Amount'] = data['Amount'].astype("float")
data['Amount'] = data['Amount']/data['Amount'].abs().max()

In [25]:
data['Is Fraud?'] = label_encoder.fit_transform(data["Is Fraud?"])

In [26]:
data['date'] = data.apply(lambda row: str(row['Month']) + "/" + str(row['Day']) + "/" + str(row['Year']) + "/" + str(row['Time']), axis=1)

In [27]:
horizons = []
i = 0
for i in range(max_user+1):
    x = data[data['User'] == i]
    if len(x) == 0:
        continue
    T = (pd.to_datetime(x["date"], infer_datetime_format=True).astype(np.int64).astype(np.float64)/10**9
    )
    T = pd.Series(MinMaxScaler().fit_transform(T.values.reshape(-1, 1)).squeeze())
    horizons += T.values.tolist()

In [28]:
data["Horizons"] = horizons

In [29]:
data.drop("Year", axis = 1, inplace = True)
data.drop("Month", axis = 1, inplace = True)
data.drop("Day", axis = 1, inplace = True)
data.drop("Unnamed: 0", axis = 1, inplace = True)
data.drop("date", axis = 1, inplace = True)

In [30]:
data.sort_values(by=['User', 'Horizons'], inplace = True)

In [31]:
data['Time'] = data.apply(lambda row: int(row['Time'].split(":")[0]), axis=1)

In [32]:
user_data = []
for i in range(max_user+1):
    x = data[data['User'] == i]
    if len(x) == 0:
        continue
    user_data.append(x)

In [33]:
static_cols = ["User"]
temporal_cols = ["Card", "Time", "Amount",
                      "Use Chip", "Merchant Name", "Merchant City",
                      "Merchant State", "Zip", "MCC"]
horizon_cols = ["Horizons"]
outcome_cols = ["Is Fraud?"]

In [34]:
# for i in range(max_user+1):
#     x = data[data['User'] == i]
#     if len(x) == 0:
#         continue
#     if x.isnull().values.any():
#         print("yes")
#         # data.drop(x[:].index, inplace = True)

In [32]:
# for i in range(max_user+1):
#     x = len(data[data['User'] == i])
#     if x not in num_seq_len.keys():
#         num_seq_len[x] = 0
#     num_seq_len[x] += 1

In [33]:
# x = []
# y = []
# for seq in sorted(num_seq_len.keys()):
#     x.append(seq)
#     y.append(num_seq_len[seq])

In [34]:
# plt.figure(figsize=(10,10))
# plt.scatter(x, y, label='train_original')
# plt.xlabel("Epoch")
# plt.ylabel("Loss")
# plt.legend()
# plt.show

In [35]:
# cur = 0
# for i in range(len(x)):
#     if max_user - cur < 0.8*max_user:
#         break
#     cur += y[i]
# min_seq_len = x[i]

In [36]:
# min_seq_len

In [37]:
# for i in range(max_user+1):
#     if len(data[data['User'] == i]) < min_seq_len:
#         data.drop(data[data['User'] == i].index, inplace = True)
#     else:
#         data.drop(data[data['User'] == i][min_seq_len:].index, inplace=True)

In [38]:
# from pathlib import Path  
# filepath = Path('data/card_transaction_trimmed.csv')  
# filepath.parent.mkdir(parents=True, exist_ok=True)  
# data.to_csv(filepath)  


In [39]:
# for index, row in data.iterrows():
#     try:
#       datetime(row['Year'], row['Month'], row['Day'], int(row['Time'].split(':')[0]), int(row['Time'].split(':')[1]))
#     except:
#       data.drop(index, inplace=True)

In [40]:
# max_user = data['User'].max()
# min_seq_len = None
# for i in range(max_user+1):
#     x = len(data[data['User'] == i])
#     if (min_seq_len == None or x < min_seq_len) and x != 0:
#         min_seq_len = x
# for i in range(max_user+1):
#     x = len(data[data['User'] == i])
#     if x == 0:
#         continue
#     data.drop(data[data['User'] == i][min_seq_len:].index, inplace=True)

In [41]:
# from pathlib import Path  
# filepath = Path('data/card_transaction_trimmed.csv')  
# filepath.parent.mkdir(parents=True, exist_ok=True)  
# data.to_csv(filepath)  

In [42]:
# from pathlib import Path  
# filepath = Path('data/card_transaction_trimmed.csv')  
# filepath.parent.mkdir(parents=True, exist_ok=True)  
# data.to_csv(filepath)  

In [35]:
data.dtypes

User                int64
Card                int64
Time                int64
Amount            float64
Use Chip            int64
Merchant Name       int64
Merchant City       int64
Merchant State      int64
Zip                 int64
MCC                 int64
Is Fraud?           int64
Horizons          float64
dtype: object

In [36]:
train_data, test_data = train_test_split(user_data, test_size=0.2)

In [37]:
def split_data(data, static_cols, temporal_cols, horizon_cols, outcome_cols):
    temporal_dataframes = []
    static_dataframes = pd.DataFrame(columns=[static_cols])
    horizons = []
    outcomes = pd.DataFrame(columns=[outcome_cols])
    for sample in data:
        static_dataframes.loc[len(static_dataframes.index)] = [sample[static_cols].iloc[0][0]]
        outcomes.loc[len(outcomes.index)] = [sample[outcome_cols].iloc[-1][0]]
        temporal_dataframes.append(sample[temporal_cols])
        horizons.append(sample[horizon_cols].values.tolist())
    # print(type(outcome))
    # outcomes = pd.concat(outcome)
    return static_dataframes, temporal_dataframes, horizons, outcomes

In [38]:
static_train, temporal_train, horizons_train, outcome_train = split_data(train_data, static_cols, temporal_cols, horizon_cols, outcome_cols)

In [39]:
delta = 1/ (len(data) * np.log(len(data)))

In [40]:
data = TimeSeriesDataLoader(
    temporal_data=temporal_train,
    observation_times=horizons_train,
    static_data=static_train,
    outcome = outcome_train,
)

Unique Temporal Data
finding max len
Checking Outcome
Pack raw data
Running initializer


In [41]:
horizons

[0.0,
 7.243452436256348e-05,
 0.004970388076586119,
 0.007326234749946536,
 0.009940776153179343,
 0.011492944532399463,
 0.0147973385486182,
 0.014859425283788141,
 0.019843610412635826,
 0.0205369122886907,
 0.02275478843944967,
 0.022834121489943016,
 0.02481744775229089,
 0.029784386565808063,
 0.029846473300978005,
 0.03048458696799372,
 0.0347306298009471,
 0.03482720916676385,
 0.03631039228469035,
 0.03801087897570454,
 0.03984933618932729,
 0.04042191385810412,
 0.041177302469328936,
 0.04121869362610653,
 0.042567355484500524,
 0.04469900005863536,
 0.044705898584766146,
 0.04473694195235112,
 0.047868872815321595,
 0.049693532976682775,
 0.04971077929200618,
 0.05106633967653096,
 0.05110428157024671,
 0.051124977148631956,
 0.052390856693470766,
 0.05463632694874576,
 0.05465357326407627,
 0.05473635557763146,
 0.05574698965565972,
 0.059606715025338985,
 0.0596584539713092,
 0.06452881341901673,
 0.06597405464322748,
 0.06937502802526296,
 0.06952679560011887,
 0.06965441

In [42]:
data.dataframe()

Unnamed: 0,seq_id,seq_time_id,"seq_static_('User',)",seq_temporal_Amount,seq_temporal_Card,seq_temporal_MCC,seq_temporal_Merchant City,seq_temporal_Merchant Name,seq_temporal_Merchant State,seq_temporal_Time,seq_temporal_Use Chip,seq_temporal_Zip,"seq_out_('Is Fraud?',)"
0,0,[0.0],1712,0.024753,0.0,5411.0,4245.0,13684.0,112.0,12.0,2.0,11333.0,0
1,0,[0.003515785632394852],1712,0.000876,0.0,5912.0,4245.0,3913.0,112.0,10.0,2.0,11332.0,0
2,0,[0.0038970482106535087],1712,0.021245,0.0,5300.0,374.0,16243.0,112.0,12.0,2.0,11344.0,0
3,0,[0.004054999850218621],1712,0.002456,0.0,5813.0,4245.0,23587.0,112.0,13.0,2.0,11332.0,0
4,0,[0.004177548536091535],1712,0.024194,0.0,4829.0,3582.0,7385.0,112.0,14.0,2.0,11362.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572155,1279,[0.9955809595968823],1413,0.006182,0.0,4900.0,0.0,4884.0,129.0,18.0,1.0,14121.0,0
572156,1279,[0.9971619557859164],1413,0.009663,0.0,7349.0,5847.0,20004.0,32.0,18.0,2.0,4752.0,0
572157,1279,[0.9973976522906156],1413,0.005502,0.0,5814.0,5847.0,7065.0,32.0,22.0,2.0,4752.0,0
572158,1279,[0.9984900356342976],1413,0.006554,0.0,5251.0,5847.0,24028.0,32.0,15.0,2.0,4753.0,0


In [43]:
df_data = data.dataframe()

In [44]:
df_data

Unnamed: 0,seq_id,seq_time_id,"seq_static_('User',)",seq_temporal_Amount,seq_temporal_Card,seq_temporal_MCC,seq_temporal_Merchant City,seq_temporal_Merchant Name,seq_temporal_Merchant State,seq_temporal_Time,seq_temporal_Use Chip,seq_temporal_Zip,"seq_out_('Is Fraud?',)"
0,0,[0.0],1712,0.024753,0.0,5411.0,4245.0,13684.0,112.0,12.0,2.0,11333.0,0
1,0,[0.003515785632394852],1712,0.000876,0.0,5912.0,4245.0,3913.0,112.0,10.0,2.0,11332.0,0
2,0,[0.0038970482106535087],1712,0.021245,0.0,5300.0,374.0,16243.0,112.0,12.0,2.0,11344.0,0
3,0,[0.004054999850218621],1712,0.002456,0.0,5813.0,4245.0,23587.0,112.0,13.0,2.0,11332.0,0
4,0,[0.004177548536091535],1712,0.024194,0.0,4829.0,3582.0,7385.0,112.0,14.0,2.0,11362.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572155,1279,[0.9955809595968823],1413,0.006182,0.0,4900.0,0.0,4884.0,129.0,18.0,1.0,14121.0,0
572156,1279,[0.9971619557859164],1413,0.009663,0.0,7349.0,5847.0,20004.0,32.0,18.0,2.0,4752.0,0
572157,1279,[0.9973976522906156],1413,0.005502,0.0,5814.0,5847.0,7065.0,32.0,22.0,2.0,4752.0,0
572158,1279,[0.9984900356342976],1413,0.006554,0.0,5251.0,5847.0,24028.0,32.0,15.0,2.0,4753.0,0


In [45]:
df_data['seq_time_id'] = df_data.apply(lambda row: row['seq_time_id'][0], axis=1)

In [46]:
df_data

Unnamed: 0,seq_id,seq_time_id,"seq_static_('User',)",seq_temporal_Amount,seq_temporal_Card,seq_temporal_MCC,seq_temporal_Merchant City,seq_temporal_Merchant Name,seq_temporal_Merchant State,seq_temporal_Time,seq_temporal_Use Chip,seq_temporal_Zip,"seq_out_('Is Fraud?',)"
0,0,0.000000,1712,0.024753,0.0,5411.0,4245.0,13684.0,112.0,12.0,2.0,11333.0,0
1,0,0.003516,1712,0.000876,0.0,5912.0,4245.0,3913.0,112.0,10.0,2.0,11332.0,0
2,0,0.003897,1712,0.021245,0.0,5300.0,374.0,16243.0,112.0,12.0,2.0,11344.0,0
3,0,0.004055,1712,0.002456,0.0,5813.0,4245.0,23587.0,112.0,13.0,2.0,11332.0,0
4,0,0.004178,1712,0.024194,0.0,4829.0,3582.0,7385.0,112.0,14.0,2.0,11362.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572155,1279,0.995581,1413,0.006182,0.0,4900.0,0.0,4884.0,129.0,18.0,1.0,14121.0,0
572156,1279,0.997162,1413,0.009663,0.0,7349.0,5847.0,20004.0,32.0,18.0,2.0,4752.0,0
572157,1279,0.997398,1413,0.005502,0.0,5814.0,5847.0,7065.0,32.0,22.0,2.0,4752.0,0
572158,1279,0.998490,1413,0.006554,0.0,5251.0,5847.0,24028.0,32.0,15.0,2.0,4753.0,0


In [50]:
max_seq_len = len(df_data[df_data["seq_static_('User',)"] == 0])

In [51]:
max_seq_len

447

In [52]:
model = DGAN(DGANConfig(
    max_sequence_len=max_seq_len,
    sample_len=max_seq_len,
    batch_size=1000,
    apply_feature_scaling=True,
    apply_example_scaling=False,
    use_attribute_discriminator=False,
    generator_learning_rate=1e-4,
    discriminator_learning_rate=1e-4,
    epochs=1000,
))

In [53]:
model.train_dataframe(
    df_data,
    attribute_columns=["seq_out_('Is Fraud?',)", "seq_static_('User',)"],
    feature_columns = ["seq_temporal_Amount","seq_temporal_Card","seq_temporal_MCC","seq_temporal_Merchant City","seq_temporal_Merchant Name","seq_temporal_Merchant State","seq_temporal_Time","seq_temporal_Use Chip","seq_temporal_Zip"],
    time_column = "seq_time_id",
    example_id_column = "seq_id",
    df_style = DfStyle.LONG
)

In [None]:
synthetic_df_dopple = model.generate_dataframe(len(df_data))

In [None]:
synthetic_df_dopple

In [49]:
syn_model_without_dp = Plugins().get("timegan", dp_enabled=False)

[2023-09-26T01:50:02.358215+0000][13068][CRITICAL] load failed: 
arfpy is not installed. Please install it with pip install arfpy.
Please be aware that arfpy is only available for python >= 3.8.

[2023-09-26T01:50:02.359302+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:02.360220+0000][13068][CRITICAL] module plugin_arf load failed
[2023-09-26T01:50:02.363671+0000][13068][CRITICAL] load failed: No module named 'pgmpy'
[2023-09-26T01:50:02.364634+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:02.365540+0000][13068][CRITICAL] module plugin_bayesian_network load failed
[2023-09-26T01:50:02.482262+0000][13068][CRITICAL] module disabled: /home/skunk/synthetic_time_series/src/./synthcity/src/synthcity/plugins/generic/plugin_goggle.py
[2023-09-26T01:50:02.487248+0000][13068][CRITICAL] load failed: 
GReaT is not installed. Pl

In [50]:
syn_model_with_dp_eps1 = Plugins().get("timegan", mode="DPRNN", dp_enabled=True, n_iter=300, epsilon=1., delta = delta)

[2023-09-26T01:50:04.037540+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.039897+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.040937+0000][13068][CRITICAL] module plugin_arf load failed
[2023-09-26T01:50:04.042021+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.043130+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.044178+0000][13068][CRITICAL] module plugin_bayesian_network load failed
[2023-09-26T01:50:04.045642+0000][13068][CRITICAL] module disabled: /home/skunk/synthetic_time_series/src/./synthcity/src/synthcity/plugins/generic/plugin_goggle.py
[2023-09-26T01:50:04.046944+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.

In [51]:
syn_model_with_dp_eps5 = Plugins().get("timegan", mode="DPRNN", dp_enabled=True, n_iter=300, epsilon=5., delta = delta)

[2023-09-26T01:50:04.098177+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.100401+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.101544+0000][13068][CRITICAL] module plugin_arf load failed
[2023-09-26T01:50:04.102859+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.103979+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.105052+0000][13068][CRITICAL] module plugin_bayesian_network load failed
[2023-09-26T01:50:04.106637+0000][13068][CRITICAL] module disabled: /home/skunk/synthetic_time_series/src/./synthcity/src/synthcity/plugins/generic/plugin_goggle.py
[2023-09-26T01:50:04.109725+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.

In [52]:
syn_model_with_dp_eps20 = Plugins().get("timegan", mode="DPRNN", dp_enabled=True, n_iter=300, epsilon=20., delta = delta)

[2023-09-26T01:50:04.155823+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.157719+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_arf' has no attribute 'plugin'
[2023-09-26T01:50:04.159707+0000][13068][CRITICAL] module plugin_arf load failed
[2023-09-26T01:50:04.161092+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.162255+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_bayesian_network' has no attribute 'plugin'
[2023-09-26T01:50:04.163471+0000][13068][CRITICAL] module plugin_bayesian_network load failed
[2023-09-26T01:50:04.166190+0000][13068][CRITICAL] module disabled: /home/skunk/synthetic_time_series/src/./synthcity/src/synthcity/plugins/generic/plugin_goggle.py
[2023-09-26T01:50:04.167333+0000][13068][CRITICAL] load failed: module 'synthcity.plugins.

In [None]:
syn_model_without_dp.fit(data)

In [None]:
syn_model_with_dp_eps1.fit(data)

In [None]:
syn_model_with_dp_eps5.fit(data)

In [None]:
syn_model_with_dp_eps20.fit(data)

In [None]:
synth_data_without_dp = syn_model_without_dp.generate(count=len(train_data)/10)

In [None]:
synth_data_with_dp = syn_model_with_dp_eps1.generate(count=len(train_data)/10)

In [None]:
synth_data_with_dp_eps5 = syn_model_with_dp_eps5.generate(count=len(train_data)/10)

In [None]:
synth_data_with_dp_eps20 = syn_model_with_dp_eps20.generate(count=len(train_data)/10)

In [None]:
def create_sequence(dataset, seq_len):
    sequences = []
    labels = []


    for index in range(0, len(dataset)//seq_len): # Selecting 50 rows at a time
        sequences.append(dataset.iloc[index*seq_len:(index+1)*seq_len-1])
        labels.append(dataset.iloc[(index+1)*seq_len-1][2])
    return (np.array(sequences),np.array(labels))

In [None]:
def create_sequence_np(dataset, seq_len):
    sequences = []
    labels = []
    for index in range(0, len(dataset)): # Selecting 50 rows at a time
        sequences.append(dataset[index][:-1])
        labels.append(dataset[index][-1][2])
    return (np.array(sequences),np.array(labels))

In [None]:
train_data = TimeSeriesDataLoader(
    temporal_data=temporal_train,
    observation_times=horizons_train,
    static_data=static_train,
    outcome = outcome_train,
)

In [None]:
test_data = train_data = TimeSeriesDataLoader(
    temporal_data=temporal_test,
    observation_times=horizons_test,
    static_data=static_test,
    outcome = outcome_test,
)

In [None]:
class Network(nn.Module):
    def __init__(self, dropout,
                 input_dim=8, num_classes = 1, hidden_size = 10, **kwargs):

        super(Network, self).__init__()

        print("==> not used params in network class:", kwargs.keys())

        self.input_dim = input_dim
        self.dropout = dropout
        self.hidden_size = hidden_size


        # Main part of the network
        self.lstm_layers = nn.ModuleList()
        self.lstm_layers.append(nn.LSTM(input_size=input_dim,hidden_size=self.hidden_size))
        self.lstm_layers.append(nn.Dropout(p=dropout))
#         self.lstm_layers.append(nn.LSTM(input_size=self.hidden_size,
#                            hidden_size=self.hidden_size))
#         self.lstm_layers.append(nn.Dropout(p=dropout))
#         self.lstm_layers.append(nn.LSTM(input_size=self.hidden_size,
#                            hidden_size=self.hidden_size))
#         self.lstm_layers.append(nn.Dropout(p=dropout))
        # Output layer
        self.output_layer = nn.Linear(self.hidden_size, num_classes)
        # self.output_activation = nn.Sigmoid()

    def forward(self, X, lengths=None):
        # Input layer
        X = X.to(torch.float32)
        mX = X
        # Main part of the network
        i = 0
        for lstm in self.lstm_layers:
            if i % 2 == 0:
                output, _ = lstm(mX)
            else:
                output = lstm(mX)
            mX = output
            i += 1
        L = output[:, -1, :]
        # Output layer
        y = self.output_layer(L)
        return y


In [None]:
def run_model(model, training_error, testing_error, X_train, y_train, X_test, y_test):
    loss = nn.BCELoss()
    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=1)
    val_loader = DataLoader(list(zip(X_test, y_test)), batch_size=1, shuffle=True)
    optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
    for epoch in range(0, 100):
        model.train()
        step_loss = []
        for i, batch in enumerate(train_loader):
            optimizer.zero_grad()
            inputs, targets = batch
            outputs = model(inputs)
            targets = targets.reshape(tuple(outputs.shape))
            targets = targets.to(torch.float32)
            batch_loss = loss(outputs, targets)
            batch_loss.backward()
            optimizer.step()
            step_loss.append(batch_loss.item())
        training_error.append(np.array(step_loss).mean())
        model.eval()
        val_loss = 0
        with torch.no_grad():
            num_batches = 0
            res = {}
            y_true = []
            predictions = []
            for batch in val_loader:
                inputs, targets = batch
                outputs = model(inputs)
                targets = targets.reshape(tuple(outputs.shape))
                targets = targets.to(torch.float32)
                batch_loss = loss(outputs, targets)
                val_loss += batch_loss.item()
                num_batches += 1
            val_loss /= num_batches
            print('Epoch [{}/{}], Step [{}/{}], Validation Batch Loss: {:.4f}'
                    .format(epoch + 1, 100, i + 1, len(train_loader), val_loss))
        testing_error.append(val_loss)

In [None]:
synth_no_dp_X, synth_no_dp_y = create_sequence(synth_data_without_dp.dataframe(), 10)

In [None]:
synth_dp_X, synth_dp_y = create_sequence(synth_data_with_dp.dataframe(), 10)

In [None]:
synth_dp_eps5_X, synth_dp_eps5_y = create_sequence(synth_data_with_dp_eps5.dataframe(), 10)

In [None]:
synth_dp_eps20_X, synth_dp_eps20_y = create_sequence(synth_data_with_dp_eps20.dataframe(), 10)

In [None]:
X_train, y_train = create_sequence(train_data.dataframe(), 10)

In [None]:
X_test, y_test = create_sequence(test_data.dataframe(), 10)

In [None]:
model = Network(0.2)
trainingEpoch_loss = []
validationEpoch_loss = []
run_model(model, trainingEpoch_loss, validationEpoch_loss, X_train, y_train, X_test, y_test)

In [None]:
synth_no_dp_model = Network(0.2)
trainingEpoch_no_dp_loss = []
validationEpoch_no_dp_loss = []
run_model(synth_no_dp_model, trainingEpoch_no_dp_loss, validationEpoch_no_dp_loss, synth_no_dp_X, synth_no_dp_y, X_test, y_test)

In [None]:
synth_dp_model = Network(0.2)
trainingEpoch_dp_loss = []
validationEpoch_dp_loss = []
run_model(synth_dp_model, trainingEpoch_dp_loss, validationEpoch_dp_loss, synth_dp_X, synth_dp_y, X_test, y_test)

In [None]:
synth_dp_eps5_model = Network(0.2)
trainingEpoch_dp_eps5_loss = []
validationEpoch_dp_eps5_loss = []
run_model(synth_dp_eps5_model, trainingEpoch_dp_eps5_loss, validationEpoch_dp_eps5_loss, synth_dp_eps5_X, synth_dp_eps5_y, X_test, y_test)

In [None]:
synth_dp_eps20_model = Network(0.2)
trainingEpoch_dp_eps20_loss = []
validationEpoch_dp_eps20_loss = []
run_model(synth_dp_eps20_model, trainingEpoch_dp_eps20_loss, validationEpoch_dp_eps20_loss, synth_dp_eps20_X, synth_dp_eps20_y, X_test, y_test)

In [None]:
plt.figure(figsize=(10,10))
plt.plot(trainingEpoch_loss, label='train_original')
plt.plot(validationEpoch_loss,label='val_original')
plt.plot(trainingEpoch_no_dp_loss, label='train_no_dp_synthetic')
plt.plot(validationEpoch_no_dp_loss,label='val_no_dp_synthetic')
plt.plot(trainingEpoch_dp_loss, label='train_dp_eps=1_synthetic')
plt.plot(validationEpoch_dp_loss,label='val_dp_eps=1_synthetic')
plt.plot(trainingEpoch_dp_eps5_loss, label='train_dp_eps=5_synthetic')
plt.plot(validationEpoch_dp_eps5_loss,label='val_dp_eps=5_synthetic')
plt.plot(trainingEpoch_dp_eps20_loss, label='train_dp_eps=20_synthetic')
plt.plot(validationEpoch_dp_eps20_loss,label='val_dp_eps=20_synthetic')
plt.plot(trainingEpoch_random_loss, label='train_random')
plt.plot(validationEpoch_random_loss,label='val_random')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show