In [None]:
import os
import pandas as pd
import numpy as np
import json
import torch
from typing import List, Callable
import random

import random
import colorsys
import requests
from io import BytesIO

import skimage.io
from skimage.measure import find_contours
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import torch.nn as nn
import torchvision
from torchvision import transforms as pth_transforms
from PIL import Image

# seed
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

In [None]:
# attention paths
ORG_OUT_PATH = '/Users/javi/org_out/'
ADV_OUT_PATH ='/Users/javi/adv_out/'

# Statistical analysis

In [None]:
org_data = None

In [None]:
# Load data
for file in os.listdir(ORG_OUT_PATH):
    if file.startswith('.'):
        continue
    temp = torch.load(os.path.join(ORG_OUT_PATH, file), map_location=torch.device('cpu'))
    
    if org_data is None:
        org_data = torch.clone(temp)
    else:
        org_data = torch.cat((org_data, temp), 0)

In [None]:
org_data.shape

In [None]:
adv_data = None

In [None]:
# Load data
for file in os.listdir(ADV_OUT_PATH):
    if file.startswith('.'):
        continue
    temp = torch.load(os.path.join(ADV_OUT_PATH, file), map_location=torch.device('cpu'))
    
    if adv_data is None:
        adv_data = torch.clone(temp)
    else:
        adv_data = torch.cat((adv_data, temp), 0)

In [None]:
adv_data.shape

torch.Size([283, 1536])

## Clustering with CVAE

In [None]:
org_data.shape

torch.Size([272, 1536])

In [None]:
adv_data.shape

torch.Size([283, 1536])

In [None]:
input_data = torch.cat((org_data, adv_data), 0)
input_data.shape

torch.Size([555, 1536])

In [None]:
X = input_data.detach().numpy()

In [None]:
from cvae import cvae
embedder = cvae.CompressionVAE(X)
embedder.train()

In [None]:
z = embedder.embed(X)

In [None]:
labels = [0]*org_data.shape[0] + [1]*adv_data.shape[0]

In [None]:
embedder.visualize(z, labels=labels)

# Simple classifier

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
X.shape

(555, 1536)

In [None]:
import pandas as pd

org_data_df = pd.DataFrame(org_data.detach().numpy())
adv_data_df = pd.DataFrame(adv_data.detach().numpy())
org_data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,-4.44373,-1.983228,1.775634,-0.969847,-0.628211,-0.8836,0.346246,-1.11069,-1.480357,1.032114,...,5.8396,-3.684886,-0.070223,-3.682222,0.516357,0.660955,1.670559,1.786071,-0.645031,-5.18912
1,-2.103292,-0.078532,3.644679,-0.11795,1.35003,-0.315393,-0.976455,-0.142835,0.195763,2.347682,...,-8.15517,6.137487,-6.130633,-2.527758,-1.492658,-2.970686,1.371391,0.624259,6.785526,4.020866
2,1.464693,2.133696,0.297278,0.965885,1.295629,-1.763362,-2.298384,-0.262742,2.247479,1.352898,...,-0.258738,1.174659,2.371411,-1.06272,-7.890429,1.641481,3.431002,4.54131,-4.137976,-2.71049
3,-1.533385,-1.42868,-0.478766,0.566976,1.36767,-0.428659,-0.273903,0.159301,1.068214,0.450773,...,9.034399,-6.69125,-2.153956,-2.569039,0.388034,-1.723981,2.461914,-2.094007,2.17962,3.091781
4,-1.935284,-0.204915,0.752658,-0.588209,-2.514683,-0.457104,1.622655,0.779358,-1.414814,2.069527,...,3.488628,8.411485,-0.350613,-6.746727,4.078449,-1.466261,2.307746,1.600232,4.396266,-0.329207


In [None]:
org_data_df['target'] = 0
adv_data_df['target'] = 1

In [None]:
org_data_df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1527,1528,1529,1530,1531,1532,1533,1534,1535,target
0,-4.44373,-1.983228,1.775634,-0.969847,-0.628211,-0.8836,0.346246,-1.11069,-1.480357,1.032114,...,-3.684886,-0.070223,-3.682222,0.516357,0.660955,1.670559,1.786071,-0.645031,-5.18912,0
1,-2.103292,-0.078532,3.644679,-0.11795,1.35003,-0.315393,-0.976455,-0.142835,0.195763,2.347682,...,6.137487,-6.130633,-2.527758,-1.492658,-2.970686,1.371391,0.624259,6.785526,4.020866,0
2,1.464693,2.133696,0.297278,0.965885,1.295629,-1.763362,-2.298384,-0.262742,2.247479,1.352898,...,1.174659,2.371411,-1.06272,-7.890429,1.641481,3.431002,4.54131,-4.137976,-2.71049,0


In [None]:
data = pd.concat([org_data_df, adv_data_df])
data.shape

(555, 1537)

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.3)

In [None]:
train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1527,1528,1529,1530,1531,1532,1533,1534,1535,target
185,1.273176,0.128338,0.856010,5.027805,0.004976,-1.149979,0.606810,-0.394169,0.961845,1.119682,...,-0.976023,1.577306,-2.998958,-0.284316,6.886778,1.255800,-0.043122,3.206200,-4.859666,0
61,-3.335111,-0.380675,0.061883,0.255133,0.003526,-0.047229,-1.035961,-0.147918,-1.620440,-1.915376,...,1.697186,4.313850,-4.441945,-0.160180,2.591147,3.232965,-0.882461,-4.944693,-2.482625,0
125,-4.224346,0.032413,1.458087,1.504792,-0.739043,-1.412148,-0.426359,0.952828,0.269689,-1.651418,...,3.545424,5.841618,-5.915909,3.786183,2.452273,4.080591,-0.014846,-5.053046,-3.731267,1
146,-2.222119,-1.366386,0.933432,1.631109,0.434984,-1.088873,-0.403271,0.186893,-2.116196,-0.421047,...,-5.103269,5.028584,-4.025279,-0.135963,4.709066,-2.214010,-1.356852,3.690621,5.493515,1
172,-0.236359,-0.022697,0.097689,-3.042265,-0.886717,-0.270007,1.299148,-2.246553,-1.467036,-0.000446,...,-3.192858,-1.373758,-5.352683,-1.284639,0.198801,-3.473937,2.068084,-2.944398,2.142596,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,0.275501,1.035690,1.624897,0.166092,-0.014164,-0.744959,0.326784,-0.278091,-0.582315,0.239541,...,0.006588,-3.695614,-5.753434,-0.510481,5.326731,-0.714983,4.476257,-2.258754,3.409911,0
68,-4.027798,-0.459433,0.449201,1.460362,1.693533,-0.526300,-0.220770,-3.119596,-1.355093,0.371345,...,1.052677,-1.485413,-3.858641,-0.381075,0.653798,-5.311978,3.968304,0.062128,5.579256,0
10,-1.639204,-0.074866,0.683072,-0.433916,0.414590,-0.438611,0.413173,0.315742,-0.491801,0.192002,...,1.761547,-1.366699,-4.906579,-0.920463,1.440296,-2.707350,3.382739,-3.669101,0.585613,1
19,-2.130597,0.164450,-1.247337,0.302727,0.714773,-0.248148,0.195578,-0.561176,-2.721044,-0.006302,...,-4.243730,-2.908430,-7.217159,4.719939,-4.546128,1.274457,2.032944,-3.278334,3.827728,1


In [None]:
train['target'].values

array([0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0,

In [None]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

train_target = torch.tensor(train['target'].values.flatten()).float()
train_data = torch.tensor(train.drop(columns=['target']).values) 
train_tensor = TensorDataset(train_data, train_target) 
train_loader = DataLoader(dataset = train_tensor, batch_size = 8, shuffle = True)

In [None]:
test_target = torch.tensor(test['target'].values.flatten()).float()
test_data = torch.tensor(test.drop(columns=['target']).values) 
test_tensor = TensorDataset(test_data, test_target) 
test_loader = DataLoader(dataset = test_tensor, batch_size = 8, shuffle = True)

In [None]:
for i, t in train_loader:
    print(i)
    break

tensor([[ 2.7434, -0.6738,  3.3175,  ..., -1.0318, -1.8172,  1.3734],
        [-2.7580,  0.1491,  0.5382,  ...,  1.7862, -2.5029,  0.4886],
        [-3.7594, -1.2402,  0.4604,  ...,  1.3356, -3.9356,  2.6007],
        ...,
        [ 0.0966, -0.0332, -0.1303,  ...,  2.7631, -8.0143,  0.2929],
        [-0.3452, -3.8300,  3.5155,  ...,  4.0115, -2.7983, -0.9762],
        [-0.2364, -0.0227,  0.0977,  ...,  2.0681, -2.9444,  2.1426]])


In [None]:
# Simple Binary Classifier Network
class SimpleBC(nn.Module):
    def __init__(self,input_shape):
        super(SimpleBC,self).__init__()
        self.fc1 = nn.Linear(input_shape,64)
        self.fc2 = nn.Linear(64,32)
        self.fc3 = nn.Linear(32,1)

    def forward(self,x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [None]:
from tqdm import tqdm

# Hyperparameters
EPOCHS = 10

# Initialise network
net = SimpleBC(1536)

# Select device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
net.to(device)

# Set model to train
net.train()

# define loss, optimizer, and scheduler
criterion = nn.BCELoss()
# optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.Adagrad(net.parameters(), lr=0.01, lr_decay=1e-08, weight_decay=0)
# scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

losses = []
accur = []

# Train network
pbar = tqdm(range(EPOCHS))
for epoch in pbar:  # loop over the dataset multiple times

    # Metrics
    train_running_loss = 0.0
    train_running_loss_mean = 0.0
    train_acc = 0.0
    train_acc_mean = 0.0
    test_running_loss = 0.0
    test_acc = 0.0

    for i, (inputs, labels) in enumerate(train_loader, start=0):
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        # Forward Pass
        outputs = net(inputs).float()
        outputs = outputs.reshape(-1)

        # Backpropagation
        optimizer.zero_grad() # Reset the gradient
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # loss train
        train_running_loss += loss.item()
        train_running_loss_mean = train_running_loss / (i+1)

        # accuracy train
        predicted = net(inputs).reshape(-1).detach().cpu().numpy().round()
        acc_labels = labels
        acc_labels = acc_labels.detach().cpu().numpy()
        inter = np.equal(predicted, acc_labels)
        train_acc += inter.mean()
        train_acc_mean = train_acc / (i+1)
    
    with torch.set_grad_enabled(False):
        for inputs, labels in test_loader:
            try:
                # get the inputs; data is a list of [inputs, labels] and write to device
                inputs = inputs.to(device)
                labels = labels.to(device).float()

                # Forward Pass
                outputs = net(inputs).float()
                outputs = outputs.reshape(-1)

                # loss test
                loss = criterion(outputs, labels)
                test_running_loss += loss.item()

                # accuracy test
                outputs = outputs.detach().cpu().numpy().round()
                comparison = np.equal(labels.detach().cpu().numpy(), outputs)
                test_acc = comparison.mean()

            except Exception as e:
                print("Error: {}".format(e))
                pass

    losses.append(train_running_loss_mean)
    accur.append(train_acc_mean)
    pbar.set_description("Ep: {}\t Tr. Loss: {:.4f}\t Tr. Acc: {:.4f}\t T. Loss: {:.4f}\t T. Acc: {:.4f}".format(epoch, 
                                                                            train_running_loss_mean, 
                                                                            train_acc_mean, 
                                                                            test_running_loss, 
                                                                            test_acc))

print('Finished Training')

Ep: 9	 Tr. Loss: 0.0002	 Tr. Acc: 1.0000	 T. Loss: 1.7565	 T. Acc: 1.0000: 100%|██████████| 10/10 [00:01<00:00,  8.31it/s]

Finished Training





In [None]:
losses

[0.18091017289600353,
 0.00503055152423295,
 0.022716290308640703,
 0.001703856683220971,
 0.0005561071259447145,
 0.00042190250396557455,
 0.00034765338197609465,
 0.0002921576867210002,
 0.0002480224234111992,
 0.0002142114412333972]

In [None]:
accur

[0.9872448979591837, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

In [None]:
test_data

tensor([[ 1.8633,  0.1913,  4.3046,  ..., -1.4936, -0.8046,  1.1557],
        [-4.1269, -0.7618, -0.3790,  ..., -0.8424, -6.1853,  2.7702],
        [-0.1989,  0.8450, -0.4997,  ...,  2.6226,  1.2345, -4.7090],
        ...,
        [-0.4454, -1.6838,  1.0380,  ...,  0.1693,  2.8023, -3.1618],
        [ 0.1402, -1.1714, -2.3445,  ..., -2.9566, -0.2286, -1.6700],
        [-2.6589,  1.3423,  2.1238,  ...,  6.3238,  4.8756, -3.2321]])

In [None]:
preds = net(test_data)

In [None]:
test_target

tensor([0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0.,
        0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
        0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1.,
        1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1.,
        0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1.,
        0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0.,
        0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0.,
        1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
        1., 1., 1., 1., 0.])

In [None]:
preds.round().flatten()

tensor([0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0.,
        0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
        0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1.,
        1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1.,
        0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1.,
        0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 1.,
        0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0.,
        1., 0., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1.,
        1., 1., 1., 1., 0.], grad_fn=<ViewBackward>)

In [None]:
torch.eq(preds.round().flatten(), test_target).sum()/len(test_target)

tensor(0.9581)