In [None]:
import argparse
import copy
import gc
import math
import os
import random
import time

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from matplotlib.pyplot import figure
from torch.utils.data import DataLoader

In [None]:
parser = argparse.ArgumentParser()

experimentName = 'medGAN.UCI'

parser.add_argument("--dataset-path", type=str, default=os.path.expanduser('~/workspace/data/uci-epileptic/processed.npy'), help="Dataset file")

parser.add_argument("--n-epochs", type=int, default=100, help="number of epochs of training")
parser.add_argument("--n-epochs-ae", type=int, default=100, help="number of epochs of autoencoder training")
parser.add_argument("--batch-size", type=int, default=64, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.001, help="adam: learning rate")
parser.add_argument("--n-cpu", type=int, default=32, help="number of cpu threads to use during batch generation")
parser.add_argument('--n-critic', type=int, default=5, help='number of Discriminator iterations per each Generator iteration')
parser.add_argument('--clamp', type=float, default=0.01, help='weight clipping value')
parser.add_argument("--cuda", type=bool, default=True, help="CUDA activation")
parser.add_argument("--latent-dim", type=int, default=128, help="dimensionality of the latent space")

parser.add_argument("--pretrained", type=bool, default=False, help="Use pretrained model")
parser.add_argument("--pretrained-ae", type=bool, default=False, help="Use pretrained model for autoencoder")

parser.add_argument("--expPATH", type=str, default=os.path.expanduser('~/workspace/pytorch-exports/models/{}'.format(experimentName)), help="Export Path")

opt = parser.parse_args([])
print(opt)

In [None]:
######################
### Initialization ###
######################

# Create experiments DIR
if not os.path.exists(opt.expPATH):
    os.system('mkdir -p {0}'.format(opt.expPATH))

# opt.seed = 1024 # fix seed
opt.seed = random.randint(1, 10000)

print('Random Seed: {}'.format(opt.seed))
random.seed(opt.seed)
torch.manual_seed(opt.seed)
np.random.seed(opt.seed)
cudnn.benchmark = True

if torch.cuda.is_available() and not opt.cuda:
    print("WARNING: You have a CUDA device BUT it is not in use...")

device = torch.device("cuda:0" if opt.cuda else "cpu")
print('using "{}" as the tensor processor'.format(device))

In [None]:
#################################
### Reading Dataset from File ###
#################################

input_data = np.load(opt.dataset_path)

sample_size = input_data.shape[0]
feature_size = input_data.shape[1]

In [None]:
#####################
### Dataset Model ###
#####################

class EPIDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.sample_size = dataset.shape[0]
        self.feature_size = dataset.shape[1]

    def __len__(self):
        return self.dataset.shape[0]

    def __getitem__(self, idx):
        return self.dataset[idx]

In [None]:
##########################
### Dataset Processing ###
##########################

train_data = input_data[:int(0.8 * sample_size)]
test_data = input_data[int(0.8 * sample_size):]
print('total samples: {}, features: {}'.format(sample_size, feature_size))
print('training data shape: {}, testing data shape: {}, dataset type: {}'.format(train_data.shape, test_data.shape, input_data.dtype))

In [None]:
training_dataloader = DataLoader(
    EPIDataset(dataset=train_data),
    batch_size=opt.batch_size,
    shuffle=True,
    num_workers=opt.n_cpu
)

testing_dataloader = DataLoader(
    EPIDataset(dataset=test_data),
    batch_size=opt.batch_size,
    shuffle=True,
    num_workers=opt.n_cpu
)

In [None]:
#########################
### AutoEncoder Model ###
#########################

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(feature_size, 128),
            nn.Tanh()
        )
        self.decoder = nn.Sequential(
            nn.Linear(128, feature_size),
            nn.Sigmoid()
        )

    def encode(self, x):
        return self.encoder(x)
    
    def decode(self, x):
        return self.decoder(x)

    def forward(self, x):
        return self.decoder(self.encoder(x))

In [None]:
############################
### Model Initialization ###
############################

gc.collect()
torch.cuda.empty_cache()

autoencoder = Autoencoder().cuda() if opt.cuda else Autoencoder()
optimizer_A = torch.optim.Adam(autoencoder.parameters(), lr=opt.lr)

In [None]:
#####################################
###### AutoEncoder Training #########
#####################################

criterion = nn.MSELoss()

if not opt.pretrained_ae:
    for epoch in range(opt.n_epochs_ae):
        train_loss = 0
        autoencoder.train()
        for batch in training_dataloader:
            batch = batch.to(device)
            gen_batch = autoencoder(batch)
            loss_A = criterion(gen_batch, batch)
            optimizer_A.zero_grad()
            loss_A.backward()
            optimizer_A.step()
            train_loss += loss_A

        print("[Epoch {:3d}/{:3d}] [Training Loss: {:10.2f}]".format(epoch + 1, opt.n_epochs_ae, train_loss), flush=True)
    torch.save(autoencoder.state_dict(), opt.expPATH + '/autoencoder.model')
else:
    autoencoder.load_state_dict(torch.load(opt.expPATH + '/autoencoder.model'))

In [None]:
errors = 0
for batch in testing_dataloader:
    batch = batch.to(device)
    gen_batch = autoencoder(batch)
    diff = torch.abs(gen_batch - batch).view(-1).detach().cpu().numpy()
    wrong_digits = diff[diff > 0.05]
    errors += len(wrong_digits)
print("total number of wrong digits: {}".format(errors))

In [None]:
#############################
### Generator Model ###
#############################

# Output should be 64 * 1
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.genDim = 128
        self.linear1 = nn.Linear(opt.latent_dim, self.genDim)
        self.bn1 = nn.BatchNorm1d(self.genDim, eps=0.001, momentum=0.01)
        self.activation1 = nn.ReLU()
        self.linear2 = nn.Linear(opt.latent_dim, self.genDim)
        self.bn2 = nn.BatchNorm1d(self.genDim, eps=0.001, momentum=0.01)
        self.activation2 = nn.Tanh()

    def forward(self, x):
        # Layer 1
        residual = x
        temp = self.activation1(self.bn1(self.linear1(x)))
        out1 = temp + residual

        # Layer 2
        residual = out1
        temp = self.activation2(self.bn2(self.linear2(out1)))
        out2 = temp + residual
        return out2

In [None]:
###########################
### Discriminator Model ###
###########################

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        # Discriminator's parameters
        self.disDim = 256

        self.model = nn.Sequential(
            nn.Linear(feature_size, self.disDim),
            nn.ReLU(),
            nn.Linear(self.disDim, int(self.disDim / 2)),
            nn.ReLU(),
            nn.Linear(int(self.disDim / 2), 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        # Feeding the model
        output = self.model(x)
        return output

In [None]:
############################
### Model Initialization ###
############################

gc.collect()
torch.cuda.empty_cache()

generator = Generator().cuda() if opt.cuda else Generator()
discriminator = Discriminator().cuda() if opt.cuda else Discriminator()

optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr)
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr)

In [None]:
if not opt.pretrained:
    batches_done = 0

    discriminator.train()
    generator.train()

    for epoch in range(opt.n_epochs):
        epoch_start = time.time()
        for _ in range(opt.n_critic):
            for batch in training_dataloader:
                # ---------------------
                #  Train Discriminator
                # ---------------------
                batch = batch.to(device)
                for dp in discriminator.parameters():
                    dp.requires_grad = True

                optimizer_D.zero_grad()
                z = torch.randn(batch.shape[0], opt.latent_dim, device=device)
                fake_batch = autoencoder.decode(generator(z))
                loss_D = torch.mean(discriminator(batch), dim=0) - torch.mean(discriminator(fake_batch.detach()), dim=0)
                loss_D.backward()

                optimizer_D.step()

                for dp in discriminator.parameters():
                    dp.data.clamp_(-opt.clamp, opt.clamp)

                if batches_done % opt.n_critic == 0:
                    # -----------------
                    #  Train Generator
                    # -----------------
                    for dp in discriminator.parameters():
                        dp.requires_grad = False

                    optimizer_G.zero_grad()

                    z = torch.randn(batch.shape[0], opt.latent_dim, device=device)
                    fake_batch = autoencoder.decode(generator(z))
                    loss_G = torch.mean(discriminator(fake_batch), dim=0)
                    loss_G.backward()

                    optimizer_G.step()

                batches_done += 1
                if batches_done % (100 * opt.n_critic) == 0:
                    print('[Epoch {:3d}/{:3d}] [Batch {:4d}/{:4d}] [D loss: {:.6f}] [G loss: {:.6f}]'.format(epoch + 1, opt.n_epochs, batches_done % (opt.n_critic * len(training_dataloader)), opt.n_critic * len(training_dataloader), loss_D.item(), loss_G.item()))

        print('[Epoch {:3d}/{:3d}] [Time: {:.2f}] [D loss: {:.6f}] [G loss: {:.6f}]'.format(epoch + 1, opt.n_epochs, time.time() - epoch_start, loss_D.item(), loss_G.item()))

    torch.save(generator.state_dict(), opt.expPATH + '/generator.model')
    torch.save(discriminator.state_dict(), opt.expPATH + '/discriminator.model')
else:
    generator.load_state_dict(torch.load(opt.expPATH + '/generator.model'))
    discriminator.load_state_dict(torch.load(opt.expPATH + '/discriminator.model'))

In [None]:
autoencoder.eval()
generator.eval()
discriminator.eval()

In [None]:
num_fake_batches = 80
fake_data = torch.zeros((0, feature_size), device='cpu')
for _ in range(num_fake_batches):
  z = torch.randn(opt.batch_size, opt.latent_dim, device=device)
  generated_batch = generator(z)
  fake_batch = autoencoder.decode(generator(z))
  fake_data = torch.cat((fake_data, fake_batch.to('cpu')), 0)
np.save(os.path.join(opt.expPATH, "synthetic.npy"), fake_data.detach().cpu().numpy(), allow_pickle=False)