In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

from sklearn.utils import shuffle, resample

import math
import shutil
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from keras.utils import np_utils

import torch 
import torch.nn as nn 
from torch.nn import functional as F
from torch import optim
import torchmetrics
import torchvision
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms


from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import Callback, ModelCheckpoint, EarlyStopping

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nn-real-data-train-d1s1-1/real_labels_tr_d1s1.csv
/kaggle/input/nn-real-data-train-d1s1-1/real_signals_tr_d1s1.csv
/kaggle/input/nn-test-data-2/scrappie_labels.csv
/kaggle/input/nn-test-data-2/scrappie_signals.csv


In [3]:
class Squiggles(Dataset):
    def __init__(self, signals, labels, trn_val_tst = 0, train_perc = 0.4, valid_perc = 0.6):
        
        train_perc = train_perc
        valid_perc = valid_perc
        
        if trn_val_tst == 0:
            # Create dataset for trainloader
            self.signals = signals[0 : math.ceil(signals.shape[0] * train_perc)]
            self.labels = labels[0 : math.ceil(signals.shape[0] * train_perc)]
        elif trn_val_tst == 1:
            # Create dataset for valloader
            self.signals = signals[math.ceil(signals.shape[0] * train_perc) : math.ceil(signals.shape[0] * valid_perc)]
            self.labels = labels[math.ceil(signals.shape[0] * train_perc) : math.ceil(signals.shape[0] * valid_perc)] 
        else:
            # Create dataset for testloader
            self.signals = signals[math.ceil(signals.shape[0] * valid_perc) :]    
            self.labels = labels[math.ceil(signals.shape[0] * valid_perc) :]   
            
        self.signals = torch.from_numpy(self.signals)
        self.labels = torch.from_numpy(self.labels)
        
        
    # Define len function
    def __len__(self):
        return len(self.labels)

    # Define getitem function
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
   
        sample = self.signals[idx,:]
        labels = self.labels[idx]
                    
        return sample, labels

In [4]:
BATCH_SIZE = 128   

signals = pd.read_csv('../input/nn-test-data-2/scrappie_signals.csv').to_numpy()
labels = pd.read_csv('../input/nn-test-data-2/scrappie_labels.csv').to_numpy()

rowOrder = []
for i in range(256):
    for j in range(100):
        rowOrder.append(j * 100 + i)
signals = np.expand_dims(signals[rowOrder],1)
labels = np_utils.to_categorical(labels[rowOrder] - 1, num_classes=256)

# Call training dataset and create the trainloader.
trainset = Squiggles(trn_val_tst = 0, signals = signals, labels = labels, train_perc = 0.66, valid_perc = 1)  
trainloader = DataLoader(dataset=trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# Call validation dataset and create the valloader.
valset = Squiggles(trn_val_tst = 1, signals = signals, labels = labels, train_perc = 0.66, valid_perc = 1) 
valloader = DataLoader(dataset=valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [5]:
class Classifier_256(LightningModule):
    def __init__(self, learning_rate=0.001, train_loader=trainloader, val_loader=valloader):
        super().__init__()
        
        self.loss_fn = nn.CrossEntropyLoss()
        
        self.learning_rate = learning_rate
        self.train_loader=train_loader
        self.val_loader=val_loader
                
        self.train_accuracy = Accuracy() 
        self.val_accuracy = Accuracy()
        
        self.sm = nn.Softmax()
        
        self.conv1 = nn.Conv1d(1, 8, 8)
        self.act_conv1 = nn.ELU()
        
        self.conv2 = nn.Conv1d(8, 16, 4, dilation=2)
        self.act_conv2 = nn.ReLU()
        
        self.conv3 = nn.Conv1d(16, 32, 4, dilation=4)
        self.act_conv3 = nn.ReLU()
        
        self.conv_dropout = nn.Dropout(p=0.3)
        
        self.pool = nn.MaxPool1d(8,stride=6)
        self.act_pool = nn.ReLU()
        self.pool_dropout = nn.Dropout(p=0.5)

        self.fc1 = nn.Linear(640,512) 
        self.act_fc1 = nn.ELU()
        self.fc1_dropout = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(512,256) # The output FC layer
            
    def forward(self, x):
        x = x.float()
        x = torch.reshape(x, (-1, 1, 152))
        
        #Pass input through conv layers
        out = self.conv1(x)
        out = self.act_conv1(out)
        
        out = self.conv2(out)
        out = self.act_conv2(out)
        
        out = self.conv3(out)
        out = self.act_conv3(out)
        
        out = self.conv_dropout(out)
        
        out = self.pool(out)
        out = self.act_pool(out)
        out = self.pool_dropout(out)
        
        out = out.view(-1,640)
        out = self.fc1(out)
        out = self.act_fc1(out)
        out = self.fc1_dropout(out)
        
        out = self.fc2(out)
        
        return out
    
    def training_step(self, batch, batch_idx):
        # Write training step
        loss = self.compute_loss(batch)
        self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        x, y = batch
        self.train_accuracy.update(self(x).argmax(1), y.argmax(1))
        self.log("train_acc", self.train_accuracy, prog_bar=True, on_step=False, on_epoch=True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        # Write validation step
        loss = self.compute_loss(batch)
        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        x, y = batch
        self.val_accuracy.update(self(x).argmax(1), y.argmax(1))
        self.log("val_acc", self.val_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
    def compute_loss(self, batch):
        x_batch, y_batch = batch
        logits = self(x_batch)
        
        loss = self.loss_fn(logits, y_batch)
        return loss

    def predict(self, signals):
        logits = self(signals)
        preds = self.sm(logits)
        return preds
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    ####################
    # DATA RELATED HOOKS
    ####################

    def train_dataloader(self):
        return self.train_loader
    
    def val_dataloader(self):
        return self.val_loader

In [6]:
# import shutil
# shutil.rmtree('/kaggle/working/logs_classifier_scrappie')

In [7]:
# model_classifier = Classifier_256()
# # Define checkpoint callback function to save best model
# checkpoint_callback = ModelCheckpoint(monitor="val_loss",
#                                       dirpath='logs_classifier_scrappie/',
#                                       save_top_k=1,
#                                       mode="min",
#                                       every_n_epochs=1)

# # Train and test the model
# trainer = Trainer(
#     accelerator="auto",
#     devices=1 if torch.cuda.is_available() else None,  
#     max_epochs=150,
#     callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
#     logger=CSVLogger(save_dir='logs_classifier_scrappie/'),
#     deterministic=True
# )

# print(model_classifier)
# trainer.fit(model_classifier)

In [8]:
# # EDIT BEFORE RUNNING

model_classifier = Classifier_256()
checkpoint = torch.load('/kaggle/working/logs_classifier_scrappie/epoch=68-step=9108.ckpt', map_location=torch.device('cpu'))
model_classifier.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [9]:
# Call training dataset and create the trainloader.

BATCH_SIZE = 2048

rSquiggles = pd.read_csv('../input/nn-real-data-train-d1s1-1/real_signals_tr_d1s1.csv').to_numpy()
rLabels = pd.read_csv('../input/nn-real-data-train-d1s1-1/real_labels_tr_d1s1.csv').to_numpy()

rSquiggles = np.expand_dims(rSquiggles,1)
rLabels = np_utils.to_categorical(rLabels - 1, num_classes=256)
rSquiggles, rLabels = shuffle(rSquiggles, rLabels)

trainset = Squiggles(trn_val_tst = 0, signals=rSquiggles, labels=rLabels, train_perc = 0.6, valid_perc = 1)  
trainloader = DataLoader(dataset=trainset, batch_size=BATCH_SIZE, shuffle=True)
model_classifier.train_loader = trainloader

# Call validation dataset and create the valloader.
valset = Squiggles(trn_val_tst = 1, signals=rSquiggles, labels=rLabels, train_perc = 0.6, valid_perc = 1) 
valloader = DataLoader(dataset=valset, batch_size=BATCH_SIZE, shuffle=False)
model_classifier.val_loader = valloader

In [10]:
for name, param in model_classifier.named_parameters(): 
    if (name == 'fc1.weight'):
        break
    else:
        param.requires_grad = False

In [11]:
# shutil.rmtree('/kaggle/working/logs_classifier')

In [12]:
# # Define checkpoint callback function to save best model
# checkpoint_callback = ModelCheckpoint(monitor="val_loss",
#                                       dirpath='logs_classifier/',
#                                       save_top_k=1,
#                                       mode="min",
#                                       every_n_epochs=1)

# # Train and test the model
# trainer = Trainer(
#     accelerator="auto",
#     devices=1 if torch.cuda.is_available() else None,  
#     max_epochs=400,
#     callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
#     logger=CSVLogger(save_dir='logs_classifier/'),
#     deterministic=True
# )

# trainer.fit(model_classifier)

In [13]:
# Recover model

# model_classifier = Classifier_256()
checkpoint = torch.load('/kaggle/working/logs_classifier/epoch=262-step=526.ckpt', map_location=torch.device('cpu'))
model_classifier.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [14]:
traing_data = pd.read_csv('/kaggle/working/logs_classifier/lightning_logs/version_0/metrics.csv')
print(f'maximum val_acc: {traing_data["val_acc"][traing_data["val_loss"].argmin(0)]}')

maximum val_acc: 0.907764732837677


In [15]:
BATCH_SIZE = 128

rLabels_d = np.ones((rSquiggles.shape[0], 1))
fLabels_d = np.zeros((signals.shape[0], 1))

dLabels = np.concatenate((rLabels_d, fLabels_d))

rSquiggles_temp = np.concatenate((rSquiggles, np.expand_dims(rLabels, 1)), 2)
signals_temp = np.concatenate((signals, np.expand_dims(labels, 1)), 2)

dSquiggles = np.concatenate((rSquiggles_temp, signals_temp))
dSquiggles, dLabels = shuffle(dSquiggles, dLabels)

trainset = Squiggles(trn_val_tst = 0, signals=dSquiggles, labels=dLabels, train_perc = 0.8, valid_perc = 1)  
trainloader = DataLoader(dataset=trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# Call validation dataset and create the valloader.
valset = Squiggles(trn_val_tst = 1, signals=dSquiggles, labels=dLabels, train_perc = 0.8, valid_perc = 1) 
valloader = DataLoader(dataset=valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [16]:
class Discriminator(LightningModule):
    def __init__(self, learning_rate=0.001, train_loader=trainloader, val_loader=valloader):
        super().__init__()
        
        self.loss_fn = nn.BCELoss()
        
        self.learning_rate = learning_rate
        self.train_loader= train_loader
        self.val_loader= val_loader
                
        self.train_accuracy_real = Accuracy() 
        self.train_accuracy_fake = Accuracy() 
        self.val_accuracy_real = Accuracy()
        self.val_accuracy_fake = Accuracy()
        
        self.embedding = nn.Embedding(256, 50)
        self.class_fc = nn.Linear(50, 152) # i.e. (1x152)   
        self.act_class_fc = nn.ReLU()
        
        self.conv1 = nn.Conv1d(2, 8, 8)
        self.act_conv1 = nn.ELU()
        
        self.conv2 = nn.Conv1d(8, 16, 4, dilation=2)
        self.act_conv2 = nn.ReLU()
        
        self.conv3 = nn.Conv1d(16, 32, 4, dilation=4)
        self.act_conv3 = nn.ReLU()
        
        self.conv_dropout = nn.Dropout(p=0.3)
        
        self.pool = nn.MaxPool1d(8,stride=6)
        self.act_pool = nn.ReLU()
        self.pool_dropout = nn.Dropout(p=0.5)

        self.fc1 = nn.Linear(640,150) 
        self.act_fc1 = nn.ELU()
        self.fc1_dropout = nn.Dropout(p=0.5)

        self.fc2 = nn.Linear(150,1) # The output FC layer
        self.sigmoid = nn.Sigmoid()
        
            
    def forward(self, x):
        x = torch.reshape(x, (-1, 1, 408)).float()
        
        signal = x[:,:,0:152]
        s = torch.reshape(signal, (-1,1,152))
        
        label = x[:,:,152:].argmax(2)
        c = self.embedding(label)
        c = self.class_fc(c)
        c = self.act_class_fc(c)
        c = torch.reshape(c, (-1,1,152))
        
        out = torch.cat((s,c), dim=1) # (-1, 2, 152)
        
        out = self.conv1(out)
        out = self.act_conv1(out)
        
        out = self.conv2(out)
        out = self.act_conv2(out)
        
        out = self.conv3(out)
        out = self.act_conv3(out)
        
        out = self.conv_dropout(out)
        
        out = self.pool(out)
        out = self.act_pool(out)
        out = self.pool_dropout(out)
        
        out = out.view(-1,640)
        out = self.fc1(out)
        out = self.act_fc1(out)
        out = self.fc1_dropout(out)
        
        out = self.fc2(out)
        out = self.sigmoid(out)
        
        return out
    
    def training_step(self, batch, batch_idx):
        # Write training step
        loss = self.compute_loss(batch)
        self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        x, y = batch
        
        real = x[y == 1]
        fake = x[y == 0]
        
        real_out = torch.reshape(self(real), (-1,1))
        fake_out = torch.reshape(self(fake), (-1,1))
        
        y_real = torch.reshape(y[y == 1], real_out.shape)
        y_fake = torch.reshape(y[y == 0], fake_out.shape)
        
        if (real_out.shape[0] != 0):
            self.train_accuracy_real.update(real_out, y_real.int())
            self.log("train_acc_real", self.train_accuracy_real, prog_bar=True, on_step=False, on_epoch=True)
        
        if (fake_out.shape[0] != 0):
            self.train_accuracy_fake.update(fake_out, y_fake.int())
            self.log("train_acc_fake", self.train_accuracy_fake, prog_bar=True, on_step=False, on_epoch=True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        # Write validation step
        loss = self.compute_loss(batch)
        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        x,y = batch
        
        real = x[y == 1]
        fake = x[y == 0]
        
        real_out = torch.reshape(self(real), (-1,1))
        fake_out = torch.reshape(self(fake), (-1,1))
        
        y_real = torch.reshape(y[y == 1], real_out.shape)
        y_fake = torch.reshape(y[y == 0], fake_out.shape)
        
        if (real_out.shape[0] != 0):
            self.val_accuracy_real.update(real_out, y_real.int())
            self.log("val_acc_real", self.val_accuracy_real, prog_bar=True, on_step=False, on_epoch=True)
        
        if (fake_out.shape[0] != 0):
            self.val_accuracy_fake.update(fake_out, y_fake.int())
            self.log("val_acc_fake", self.val_accuracy_fake, prog_bar=True, on_step=False, on_epoch=True)
        
    def compute_loss(self, batch):
        x, y = batch
        
        real = x[y == 1]
        fake = x[y == 0]
        
        real_out = torch.reshape(self(real), (-1,1))
        fake_out = torch.reshape(self(fake), (-1,1))
        
        y_real = torch.reshape(y[y == 1], real_out.shape)
        y_fake = torch.reshape(y[y == 0], fake_out.shape)
        
        if (real_out.shape[0] != 0): 
            real_loss = self.loss_fn(real_out.float(), y_real.float())
        else:
            real_loss = 0
            
        if (fake_out.shape[0] != 0):
            fake_loss = self.loss_fn(fake_out.float(), y_fake.float())
        else:
            fake_loss = 0
        
        loss = (real_loss + fake_loss) / 2
        
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    ####################
    # DATA RELATED HOOKS
    ####################

    def train_dataloader(self):
        return self.train_loader
    
    def val_dataloader(self):
        return self.val_loader

In [23]:
# shutil.rmtree('/kaggle/working/logs_discriminator')

In [24]:
# model_discriminator = Discriminator()

# # Define checkpoint callback function to save best model
# checkpoint_callback = ModelCheckpoint(monitor="val_loss",
#                                       dirpath='logs_discriminator/',
#                                       save_top_k=1,
#                                       mode="min",
#                                       every_n_epochs=1)

# # Train and test the model
# trainer = Trainer(
#     accelerator="auto",
#     devices=1 if torch.cuda.is_available() else None,  
#     max_epochs=10,
#     callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
#     logger=CSVLogger(save_dir='logs_discriminator/'),
#     deterministic=True
# )

# trainer.fit(model_discriminator)

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [26]:
# Recover model

model_discriminator = Discriminator()
checkpoint = torch.load('/kaggle/working/logs_discriminator/epoch=7-step=1552.ckpt', map_location=torch.device('cpu'))
model_discriminator.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [27]:
traing_data = pd.read_csv('/kaggle/working/logs_discriminator/lightning_logs/version_0/metrics.csv')
print(f'maximum val_acc_real: {traing_data["val_acc_real"][traing_data["val_loss"].argmin(0)]}')
print(f'maximum val_acc_fake: {traing_data["val_acc_fake"][traing_data["val_loss"].argmin(0)]}')

maximum val_acc_real: 0.9800724387168884
maximum val_acc_fake: 0.998030722141266


In [28]:
BATCH_SIZE = 128
nSquiggles = np.concatenate((signals, np.expand_dims(labels, 1)), axis=2)
nSquiggles, nLabels = shuffle(nSquiggles, labels)

trainset = Squiggles(trn_val_tst = 0, signals=nSquiggles, labels=nLabels, train_perc = 0.7, valid_perc = 1)  
trainloader = DataLoader(dataset=trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# Call validation dataset and create the valloader.
valset = Squiggles(trn_val_tst = 1, signals=nSquiggles, labels=nLabels, train_perc = 0.7, valid_perc = 1) 
valloader = DataLoader(dataset=valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [29]:
class Generator(LightningModule):
    def __init__(self, learning_rate=0.001, loss_weights = [1,1,1,0], train_loader=trainloader, val_loader=valloader):
        super().__init__()
        
        self.loss_D = nn.BCELoss()
        self.loss_C = nn.CrossEntropyLoss()
        self.MSE = nn.MSELoss()
        self.L1 = nn.L1Loss()

        self.learning_rate = learning_rate
        self.train_loader= train_loader
        self.val_loader= val_loader
                
        self.train_classifier_accuracy = Accuracy() 
        self.val_classifier_accuracy = Accuracy()
        self.train_discriminator_accuracy = Accuracy() 
        self.val_discriminator_accuracy = Accuracy()
        
        self.loss_weights = loss_weights
        
        self.embedding = nn.Embedding(256, 50)
        self.signal_fc = nn.Linear(152, 1722) # re-shape to (41x42) (41 channels)
        self.class_fc = nn.Linear(50, 42) # i.e. (1x42)
        
        self.conv_t1 = nn.ConvTranspose1d(42, 20, 9, stride=2) # out: 
        self.bn = nn.BatchNorm1d(20)
        
        self.conv_t2 = nn.ConvTranspose1d(20, 1, 8, stride=2, padding=18) # out: 

    def forward(self, x):
        x = torch.reshape(x, (-1,1,408)).float()
        signal = x[:,0,0:152]
        label = x[:,0,152:408].argmax(1)
        
        c = self.embedding(label)
        c = self.class_fc(c)
        c = torch.reshape(c, (-1,1,42))
        
        s = self.signal_fc(signal)
        s = torch.relu(s)
        s = torch.reshape(s, (-1,41,42))
        
        out = torch.cat((s,c), dim=1)
        out = self.conv_t1(out)
        out = self.bn(out)
        out = torch.relu(out)
        
        out = self.conv_t2(out)
        return out
    
    def training_step(self, batch, batch_idx):
        # Write training step
        loss, class_probabilities, discriminator_probabilities, l_MSE, l_C, l_D, l_L1 = self.compute_loss(batch)
        self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        self.log("MSE_loss", l_MSE, prog_bar=True, on_step=False, on_epoch=True)
        self.log("C_loss", l_C, prog_bar=True, on_step=False, on_epoch=True)
        self.log("D_loss", l_D, prog_bar=True, on_step=False, on_epoch=True)
        self.log("L1_loss", l_L1, prog_bar=True, on_step=False, on_epoch=True)
        
        x, y = batch
        self.train_classifier_accuracy.update(class_probabilities.argmax(1), y.argmax(1))
        self.log("train_class_acc", self.train_classifier_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
        self.train_discriminator_accuracy.update(discriminator_probabilities, torch.ones(discriminator_probabilities.shape).int())
        self.log("train_disc_acc", self.train_discriminator_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        # Write validation step
        loss, class_probabilities, discriminator_probabilities, l_MSE, l_C, l_D, l_L1 = self.compute_loss(batch)
        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        
        x, y = batch
        self.val_classifier_accuracy.update(class_probabilities.argmax(1), y.argmax(1))
        self.log("val_class_acc", self.val_classifier_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
        self.val_discriminator_accuracy.update(discriminator_probabilities, torch.ones(discriminator_probabilities.shape).int())
        self.log("val_disc_acc", self.val_discriminator_accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
    def compute_loss(self, batch):
        x_batch, y_batch = batch
        
        y_batch_temp = torch.unsqueeze(y_batch, 1)
        noisy_signal_modded = torch.cat((self(x_batch),y_batch_temp), dim=2)
        l_MSE = self.MSE(x_batch.float(), noisy_signal_modded.float()).float()
        l_L1 = self.L1(x_batch.float(), noisy_signal_modded.float()).float()
        
        model_discriminator.eval()
        real_fake = model_discriminator(noisy_signal_modded)
        l_D = self.loss_D(real_fake, torch.ones(real_fake.shape))
        
        model_classifier.eval()
        c_logits = model_classifier(self(x_batch))
        l_C = self.loss_C(c_logits, y_batch) 
        
        loss = self.loss_weights[0]*l_C + self.loss_weights[1]*l_D + self.loss_weights[2]*l_MSE + self.loss_weights[3]*l_L1
        
        class_probabilities = model_classifier.predict(self(x_batch))
        return loss, class_probabilities, real_fake, l_MSE, l_C, l_D, l_L1
    
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
#         lr_scheduler = {
#         'scheduler' : torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5),
#         'monitor' : 'val_loss'}
        
        return optimizer

    ####################
    # DATA RELATED HOOKS
    ####################

    def train_dataloader(self):
        return self.train_loader
    
    def val_dataloader(self):
        return self.val_loader

    def test_dataloader(self):
        return self.test_loader

In [23]:
# shutil.rmtree('/kaggle/working/logs_generator')

In [44]:
# model_generator = Generator()
# checkpoint = torch.load('/kaggle/working/logs_generator/epoch=110-step=6860.ckpt', map_location=torch.device('cpu'))
# model_generator.load_state_dict(checkpoint['state_dict'])

# # Define checkpoint callback function to save best model
# checkpoint_callback = ModelCheckpoint(monitor="val_loss",
#                                       dirpath='logs_generator/',
#                                       save_top_k=1,
#                                       mode="min",
#                                       every_n_epochs=1)

# early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=20, verbose=True, mode="min")

# # Train and test the model
# trainer = Trainer(
#     accelerator="auto",
#     devices=1 if torch.cuda.is_available() else None,  
#     max_epochs=200,
#     callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback, early_stop_callback],
#     logger=CSVLogger(save_dir='logs_generator/'),
#     deterministic=True
    
# )

# # trainer.tune(model_generator)

# trainer.fit(model_generator)

In [30]:
# Recover model

model_generator = Generator()
checkpoint = torch.load('/kaggle/working/logs_generator/epoch=110-step=15540.ckpt', map_location=torch.device('cpu'))
model_generator.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [31]:
traing_data = pd.read_csv('/kaggle/working/logs_generator/lightning_logs/version_0/metrics.csv')
print(f'maximum val_class_acc: {traing_data["val_class_acc"][traing_data["val_loss"].argmin(0)]}')

maximum val_class_acc: 0.9748697876930236


In [None]:
shutil.rmtree('/kaggle/working/logs_generator_GAN')
shutil.rmtree('/kaggle/working/logs_discriminator_GAN')

In [None]:
import matplotlib.pyplot as plt

model_generator.loss_weights = [1,1,0,0]

classifier_accuracy_on_generator = [] # after updating generator
discriminator_accuracy_on_generator = []

discriminator_accuracy_real = [] # after updating discriminator
discriminator_accuracy_fake = []

og_train_signals = model_discriminator.train_loader.dataset.signals
og_train_labels = model_discriminator.train_loader.dataset.labels

og_val_signals = model_discriminator.val_loader.dataset.signals
og_val_labels = model_discriminator.val_loader.dataset.labels

g_epochs = 10
super_epoch = 0

while (g_epochs < 150 or super_epoch < 30):
    print(f'EPOCH {super_epoch} COMMENCING\n')
    
    # TRAIN GENERATOR
    print(f'Epoch {super_epoch}: Training Generator')
    model_generator.train()
    model_discriminator.eval()
    checkpoint_callback = ModelCheckpoint(monitor="val_loss",
                                          dirpath='logs_generator_GAN/',
                                          save_top_k=1,
                                          mode="min",
                                          every_n_epochs=1)

    # Train and test the model
    trainer = Trainer(
        accelerator="auto",
        devices=1 if torch.cuda.is_available() else None,  
        max_epochs=g_epochs,
        callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
        logger=CSVLogger(save_dir='logs_generator_GAN/'),
        deterministic=True
    )

    trainer.fit(model_generator)
    
    # UPDATE STATS
    traing_data = pd.read_csv(f'/kaggle/working/logs_generator_GAN/lightning_logs/version_{super_epoch}/metrics.csv')
    classifier_accuracy_on_generator.append(traing_data["val_class_acc"][traing_data["val_loss"].argmin(0)])
    discriminator_accuracy_on_generator.append(traing_data["val_disc_acc"][traing_data["val_loss"].argmin(0)])
    print(f'Classifier Accuracy on Generator: {100*classifier_accuracy_on_generator[-1]}')
    print(f'Mean Discriminator Conviction: {100*discriminator_accuracy_on_generator[-1]}')
    
    # OBTAIN GENERATOR OUTPUT
    model_generator.eval()
    gen_train_out_raw = model_generator(model_generator.train_loader.dataset.signals) # generator output for train/val sets
    gen_val_out_raw = model_generator(model_generator.val_loader.dataset.signals)

    gen_train_out = torch.cat((gen_train_out_raw,torch.unsqueeze(model_generator.train_loader.dataset.labels, 1)), 2) # concatenating labels
    train_labels = torch.zeros((gen_train_out_raw.shape[0], 1)) # 'fake' labels for trainset

    gen_val_out = torch.cat((gen_val_out_raw,torch.unsqueeze(model_generator.val_loader.dataset.labels, 1)), 2)
    val_labels = torch.zeros((gen_val_out_raw.shape[0], 1))
    
    # PREPARE NEW DATALOADER FOR DISCRIMINATOR
    BATCH_SIZE = 2048
    updated_discriminator_train_data = torch.cat((og_train_signals, gen_train_out), 0)
    updated_discriminator_train_labels = torch.cat((og_train_labels, train_labels), 0)

    updated_discriminator_val_data = torch.cat((og_val_signals, gen_val_out), 0)
    updated_discriminator_val_labels = torch.cat((og_val_labels, val_labels), 0)

    updated_discriminator_train_data, updated_discriminator_train_labels = shuffle(updated_discriminator_train_data, updated_discriminator_train_labels)
    updated_discriminator_val_data, updated_discriminator_val_labels = shuffle(updated_discriminator_val_data, updated_discriminator_val_labels)
    
    updated_discriminator_trainset = Squiggles(trn_val_tst = 0, signals=updated_discriminator_train_data.detach().numpy(), labels=updated_discriminator_train_labels.detach().numpy(), train_perc = 1)  
    updated_discriminator_trainloader = DataLoader(dataset=updated_discriminator_trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    # Call validation dataset and create the valloader.
    updated_discriminator_valset = Squiggles(trn_val_tst = 0, signals=updated_discriminator_val_data.detach().numpy(), labels=updated_discriminator_val_labels.detach().numpy(), train_perc = 1) 
    updated_discriminator_valloader = DataLoader(dataset=updated_discriminator_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    
    model_discriminator.train_loader = updated_discriminator_trainloader
    model_discriminator.val_loader = updated_discriminator_valloader

    print(f'Epoch {super_epoch}: Training Discriminator\n')
    model_discriminator.train()
    # Define checkpoint callback function to save best model
    checkpoint_callback = ModelCheckpoint(monitor="val_loss",
                                          dirpath='logs_discriminator_GAN/',
                                          save_top_k=1,
                                          mode="min",
                                          every_n_epochs=1)

    # Train and test the model
    trainer = Trainer(
        accelerator="auto",
        devices=1 if torch.cuda.is_available() else None,  
        max_epochs=3,
        callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
        logger=CSVLogger(save_dir='logs_discriminator_GAN/'),
        deterministic=True
    )

    trainer.fit(model_discriminator)
    
    traing_data = pd.read_csv(f'/kaggle/working/logs_discriminator_GAN/lightning_logs/version_{super_epoch}/metrics.csv')
    discriminator_accuracy_real.append(traing_data["val_acc_real"][traing_data["val_loss"].argmin(0)])
    discriminator_accuracy_fake.append(traing_data["val_acc_fake"][traing_data["val_loss"].argmin(0)])
    print(f'Discriminator Accuracy on Real Data: {discriminator_accuracy_real[-1]}')
    print(f'Discriminator Accuracy on Fake Data: {discriminator_accuracy_fake[-1]}')
    
    super_epoch = super_epoch + 1
    if (discriminator_accuracy_on_generator[-1] < 0.7):
        g_epochs = g_epochs + 10

EPOCH 0 COMMENCING

Epoch 0: Training Generator


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Classifier Accuracy on Generator: 98.3984351158142
Mean Discriminator Conviction: 98.9062488079071
Epoch 0: Training Discriminator



Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Discriminator Accuracy on Real Data: 0.9610507488250732
Discriminator Accuracy on Fake Data: 0.9819720983505248
EPOCH 1 COMMENCING

Epoch 1: Training Generator


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [76]:
# #    
# s = 0

# sample_input_signal = model_generator.val_loader.dataset.signals[0]
# sample_input_label = model_generator.val_loader.dataset.labels[0].argmax()
# PLOT SAMPLE
#     g_out = model_generator(sample_input_signal)
#     real = model_generator.train_loader.dataset.signals[model_generator.train_loader.dataset.labels.argmax(1) == sample_input_label][0]
    
#     pClass = model_classifier.predict(sample_input_signal[0,0:152])
#     pReal = model_discriminator(sample_input_signal)

#     fig = plt.figure(figsize=(10,10))
#     fig.suptitle(f'Classifier is {round(pClass.max().item()*100,1)}% sure that class is {pClass.argmax().item()}\nClass is actually {sample_input_label}\nDiscriminator is {round(pReal.item()*100,1)}% convinced')

#     plt.subplot(2, 2, 1)
#     plt.plot(np.arange(1,152 + 1), g_out[0,0,:].detach())
#     plt.title(f'Noised Signal of Class {sample_input_label}')
    
#     plt.subplot(2, 2, 2)
#     plt.plot(np.arange(1,152 + 1), sample_input_signal[0,0:152])
#     plt.title(f'Original Signal of Class {sample_input_label}')

#     plt.subplot(2, 2, 3)
#     plt.plot(np.arange(1,152 + 1), real[0,0:152])
#     plt.title(f'Sample Real Signal of Class {sample_input_label}')

In [None]:
model_generator.train()
checkpoint_callback = ModelCheckpoint(monitor="val_loss",
                                      dirpath='logs_generator_GAN_BEST/',
                                      save_top_k=1,
                                      mode="min",
                                      every_n_epochs=1)

# Train and test the model
trainer = Trainer(
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  
    max_epochs=60,
    callbacks=[TQDMProgressBar(refresh_rate=20), checkpoint_callback],
    logger=CSVLogger(save_dir='logs_generator_GAN/'),
    deterministic=True
)

trainer.fit(model_generator)

In [None]:
import matplotlib.pyplot as plt

model_classifier.eval()
model_generator.eval()
model_discriminator.eval()

s = 2
c = 42

trainset = model_generator.train_loader.dataset

noisy_signals = model_generator(trainset.signals).detach()
noisy_signals_labelled =  torch.cat((noisy_signals,torch.unsqueeze(trainset.labels, 1)), 2)
d_acc = torch.mean(model_discriminator(noisy_signals_labelled).detach())
print(f'Discriminator accuracy: {100*d_acc}')

noisy_signals = noisy_signals[trainset.labels.argmax(1) == c]
noisy_signals_labelled = noisy_signals_labelled[trainset.labels.argmax(1) == c]
labels = trainset.labels[trainset.labels.argmax(1) == c]

print(trainset.labels.argmax(1).shape)

print(sum(trainset.labels.argmax(1) == c))

pClass = model_classifier.predict(noisy_signals)[s]
pReal = model_discriminator(noisy_signals_labelled)[s]

fig = plt.figure(figsize=(10,10))
fig.suptitle(f'Classifier is {round(pClass.max().item()*100,1)}% sure that class is {pClass.argmax(0).item()}\nClass is {labels[s].argmax(0).item()}\nDiscriminator is {round(pReal.item()*100,1)}% convinced')
    
plt.subplot(2, 2, 1)
plt.plot(np.arange(1,152 + 1), noisy_signals[s][0])
plt.title(f'Noised Signal of Class {c}')

plt.subplot(2, 2, 2)
print(trainset.signals[trainset.labels.argmax(1) == c][s,0,0:152].shape)
print(np.arange(1,152 + 1).shape)

plt.plot(np.arange(1,152 + 1), trainset.signals[trainset.labels.argmax(1) == c][s,0,0:152])
plt.title(f'Original Scrappie Signal of Class {c}')

plt.subplot(2, 2, 3)
plt.plot(np.arange(1,152 + 1), rSquiggles[rLabels.argmax(1) == c][s][0])
plt.title(f'Sample Real Signal of Class {c}')