In [62]:
# make simple pytorch neural network
import torch
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.optim import Adam
#import resample
from sklearn.utils import resample
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, classification_report
from tqdm import tqdm
import numpy as np

In [63]:
#load data
data = np.load('data/processed/concatenated_features.npy')

SEED = 1706
LEARNING_RATE = 0.001
EPOCHS = 50
BATCH_SIZE = 64

In [64]:
x_majority.shape[0] / x_minority.shape[0]

5.247169200905856

In [161]:
# split into training and test set
x_train, x_test, y_train, y_test = train_test_split(data[:, :-1], data[:, -1], test_size=0.2, random_state=SEED, shuffle=True)

# upsample minority class
x_minority = x_train[y_train == 0]
y_minority = y_train[y_train == 0]
x_majority = x_train[y_train == 1]
y_majority = y_train[y_train == 1]

x_majority_subsample, y_majority_subsample = resample(x_majority, y_majority, replace=False, n_samples=x_minority.shape[0], random_state=SEED)

x_train_balanced = np.concatenate((x_majority_subsample, x_minority))
y_train_balanced = np.concatenate((y_majority_subsample, y_minority))


In [162]:
print(torch.__version__)


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')


if torch.cuda.is_available():
    !nvidia-smi

1.10.1+cu113
Using cuda device
Fri Dec 24 00:18:13 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   21C    P0    55W / 400W |   6368MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  On   | 00000000:0F:00.0 Off |                    0 |
| N/A   20C    P0    50W / 400W |      3MiB / 40536MiB | 

In [163]:
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(x_train_balanced), 
                       torch.FloatTensor(y_train_balanced))
## test data    
class TestData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = TestData(torch.FloatTensor(x_test))

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=1)

In [164]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        
        self.linear_1 = nn.Linear(5595, 10024)
        self.linear_2 = nn.Linear(10024, 1024)
        self.linear_3 = nn.Linear(1024, 256)
        self.output_layer = nn.Linear(256, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)
        self.batch_norm1 = nn.BatchNorm1d(10024)
        self.batch_norm2 = nn.BatchNorm1d(1024)
        self.batch_norm3 = nn.BatchNorm1d(256)

    def forward(self, x):
        x = self.linear_1(x)
        x = self.relu(x)
        x = self.batch_norm1(x)
        x = self.dropout(x)
        x = self.linear_2(x)
        x = self.relu(x)
        x = self.batch_norm2(x)
        x = self.dropout(x)
        x = self.linear_3(x)
        x = self.relu(x)
        x = self.batch_norm3(x)
        x = self.dropout(x)
        x = self.output_layer(x)
        
        return x
    

In [165]:
model = NeuralNetwork()
model.to(device)
print(model)

NeuralNetwork(
  (linear_1): Linear(in_features=5595, out_features=10024, bias=True)
  (linear_2): Linear(in_features=10024, out_features=1024, bias=True)
  (linear_3): Linear(in_features=1024, out_features=256, bias=True)
  (output_layer): Linear(in_features=256, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (batch_norm1): BatchNorm1d(10024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [166]:
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

In [167]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [108]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in tqdm(train_dataloader):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_dataloader):.5f} | Acc: {epoch_acc/len(train_dataloader):.3f}')

100%|██████████| 194/194 [00:01<00:00, 133.15it/s]


Epoch 001: | Loss: 0.44791 | Acc: 79.263


100%|██████████| 194/194 [00:01<00:00, 132.68it/s]


Epoch 002: | Loss: 0.36266 | Acc: 83.835


100%|██████████| 194/194 [00:01<00:00, 123.84it/s]


Epoch 003: | Loss: 0.31784 | Acc: 86.227


100%|██████████| 194/194 [00:01<00:00, 134.07it/s]


Epoch 004: | Loss: 0.27637 | Acc: 88.387


100%|██████████| 194/194 [00:01<00:00, 133.57it/s]


Epoch 005: | Loss: 0.24337 | Acc: 90.021


100%|██████████| 194/194 [00:01<00:00, 131.56it/s]


Epoch 006: | Loss: 0.22148 | Acc: 91.077


100%|██████████| 194/194 [00:01<00:00, 121.12it/s]


Epoch 007: | Loss: 0.19399 | Acc: 92.289


100%|██████████| 194/194 [00:01<00:00, 131.74it/s]


Epoch 008: | Loss: 0.16809 | Acc: 93.330


100%|██████████| 194/194 [00:01<00:00, 135.00it/s]


Epoch 009: | Loss: 0.15665 | Acc: 93.660


100%|██████████| 194/194 [00:01<00:00, 135.14it/s]


Epoch 010: | Loss: 0.15975 | Acc: 93.742


100%|██████████| 194/194 [00:01<00:00, 135.18it/s]


Epoch 011: | Loss: 0.15154 | Acc: 93.876


100%|██████████| 194/194 [00:01<00:00, 135.17it/s]


Epoch 012: | Loss: 0.14091 | Acc: 94.572


100%|██████████| 194/194 [00:01<00:00, 135.01it/s]


Epoch 013: | Loss: 0.13768 | Acc: 94.546


100%|██████████| 194/194 [00:01<00:00, 135.07it/s]


Epoch 014: | Loss: 0.11424 | Acc: 95.655


100%|██████████| 194/194 [00:01<00:00, 135.21it/s]


Epoch 015: | Loss: 0.12145 | Acc: 95.222


100%|██████████| 194/194 [00:01<00:00, 135.06it/s]


Epoch 016: | Loss: 0.12131 | Acc: 95.376


100%|██████████| 194/194 [00:01<00:00, 134.98it/s]


Epoch 017: | Loss: 0.10053 | Acc: 95.918


100%|██████████| 194/194 [00:01<00:00, 134.97it/s]


Epoch 018: | Loss: 0.09483 | Acc: 96.129


100%|██████████| 194/194 [00:01<00:00, 134.97it/s]


Epoch 019: | Loss: 0.09055 | Acc: 96.680


100%|██████████| 194/194 [00:01<00:00, 135.09it/s]


Epoch 020: | Loss: 0.09747 | Acc: 96.371


100%|██████████| 194/194 [00:01<00:00, 134.85it/s]


Epoch 021: | Loss: 0.07763 | Acc: 96.948


100%|██████████| 194/194 [00:01<00:00, 135.04it/s]


Epoch 022: | Loss: 0.08448 | Acc: 96.675


100%|██████████| 194/194 [00:01<00:00, 135.21it/s]


Epoch 023: | Loss: 0.10712 | Acc: 96.139


100%|██████████| 194/194 [00:01<00:00, 135.06it/s]


Epoch 024: | Loss: 0.09756 | Acc: 96.258


100%|██████████| 194/194 [00:01<00:00, 134.83it/s]


Epoch 025: | Loss: 0.10796 | Acc: 95.742


100%|██████████| 194/194 [00:01<00:00, 124.22it/s]


Epoch 026: | Loss: 0.09286 | Acc: 96.546


100%|██████████| 194/194 [00:01<00:00, 134.76it/s]


Epoch 027: | Loss: 0.08625 | Acc: 96.531


100%|██████████| 194/194 [00:01<00:00, 134.99it/s]


Epoch 028: | Loss: 0.08550 | Acc: 96.809


100%|██████████| 194/194 [00:01<00:00, 134.82it/s]


Epoch 029: | Loss: 0.07762 | Acc: 96.979


100%|██████████| 194/194 [00:01<00:00, 135.09it/s]


Epoch 030: | Loss: 0.07076 | Acc: 97.186


100%|██████████| 194/194 [00:01<00:00, 132.18it/s]


Epoch 031: | Loss: 0.07085 | Acc: 97.155


100%|██████████| 194/194 [00:01<00:00, 133.62it/s]


Epoch 032: | Loss: 0.06104 | Acc: 97.546


100%|██████████| 194/194 [00:01<00:00, 134.60it/s]


Epoch 033: | Loss: 0.07325 | Acc: 97.284


100%|██████████| 194/194 [00:01<00:00, 134.58it/s]


Epoch 034: | Loss: 0.07052 | Acc: 97.170


100%|██████████| 194/194 [00:01<00:00, 134.91it/s]


Epoch 035: | Loss: 0.06536 | Acc: 97.619


100%|██████████| 194/194 [00:01<00:00, 134.83it/s]


Epoch 036: | Loss: 0.07305 | Acc: 97.191


100%|██████████| 194/194 [00:01<00:00, 134.74it/s]


Epoch 037: | Loss: 0.06545 | Acc: 97.526


100%|██████████| 194/194 [00:01<00:00, 134.99it/s]


Epoch 038: | Loss: 0.05730 | Acc: 97.572


100%|██████████| 194/194 [00:01<00:00, 135.03it/s]


Epoch 039: | Loss: 0.04980 | Acc: 98.062


100%|██████████| 194/194 [00:01<00:00, 134.89it/s]


Epoch 040: | Loss: 0.05709 | Acc: 97.768


100%|██████████| 194/194 [00:01<00:00, 135.05it/s]


Epoch 041: | Loss: 0.06108 | Acc: 97.732


100%|██████████| 194/194 [00:01<00:00, 135.03it/s]


Epoch 042: | Loss: 0.05918 | Acc: 97.552


100%|██████████| 194/194 [00:01<00:00, 134.97it/s]


Epoch 043: | Loss: 0.05933 | Acc: 97.722


100%|██████████| 194/194 [00:01<00:00, 134.08it/s]


Epoch 044: | Loss: 0.05615 | Acc: 97.887


100%|██████████| 194/194 [00:01<00:00, 133.26it/s]


Epoch 045: | Loss: 0.06482 | Acc: 97.639


100%|██████████| 194/194 [00:01<00:00, 132.75it/s]


Epoch 046: | Loss: 0.07202 | Acc: 97.216


100%|██████████| 194/194 [00:01<00:00, 134.36it/s]


Epoch 047: | Loss: 0.06662 | Acc: 97.526


100%|██████████| 194/194 [00:01<00:00, 134.78it/s]


Epoch 048: | Loss: 0.05712 | Acc: 97.753


100%|██████████| 194/194 [00:01<00:00, 134.88it/s]


Epoch 049: | Loss: 0.05289 | Acc: 97.845


100%|██████████| 194/194 [00:01<00:00, 124.46it/s]

Epoch 050: | Loss: 0.05193 | Acc: 98.041





In [109]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_dataloader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [110]:
confusion_matrix(y_test, y_pred_list)

array([[1506,   34],
       [ 503, 7612]])

In [111]:
print(classification_report(y_test, y_pred_list))

              precision    recall  f1-score   support

         0.0       0.75      0.98      0.85      1540
         1.0       1.00      0.94      0.97      8115

    accuracy                           0.94      9655
   macro avg       0.87      0.96      0.91      9655
weighted avg       0.96      0.94      0.95      9655



## Shuffle a column at a time

In [173]:
# single-column shuffle iterator
def sc(x):
    p = X.shape[1]
    for i in range(p):
        hold = np.array(x[:,i])
        np.random.shuffle(x[:,i])
        yield x
        x[:,i] = hold


# copy x_train_balanced
x_train_shuffle = x_train_balanced.copy()

with tqdm(total=x_train_shuffle.shape[1]) as pbar:
    for i, shuffled_data in enumerate(sc(x_train_shuffle)):
        train_data = TrainData(torch.FloatTensor(shuffled_data), torch.FloatTensor(y_train_balanced))
        train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

        model.train()
        for e in range(1, EPOCHS+1):
            epoch_loss = 0
            epoch_acc = 0
            for X_batch, y_batch in train_dataloader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()

                y_pred = model(X_batch)

                loss = criterion(y_pred, y_batch.unsqueeze(1))
                acc = binary_acc(y_pred, y_batch.unsqueeze(1))

                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                epoch_acc += acc.item()
            tqdm.write(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_dataloader):.5f} | Acc: {epoch_acc/len(train_dataloader):.3f}')
                
        pbar.update(1)



  0%|          | 0/5595 [00:01<?, ?it/s]

Epoch 001: | Loss: 0.04220 | Acc: 98.418


  0%|          | 0/5595 [00:02<?, ?it/s]

Epoch 002: | Loss: 0.04663 | Acc: 98.088


  0%|          | 0/5595 [00:04<?, ?it/s]

Epoch 003: | Loss: 0.04527 | Acc: 98.273


  0%|          | 0/5595 [00:05<?, ?it/s]

Epoch 004: | Loss: 0.04440 | Acc: 98.284


  0%|          | 0/5595 [00:07<?, ?it/s]

Epoch 005: | Loss: 0.03983 | Acc: 98.521


  0%|          | 0/5595 [00:08<?, ?it/s]

Epoch 006: | Loss: 0.04616 | Acc: 98.211


  0%|          | 0/5595 [00:10<?, ?it/s]

Epoch 007: | Loss: 0.04965 | Acc: 98.082


  0%|          | 0/5595 [00:11<?, ?it/s]

Epoch 008: | Loss: 0.03645 | Acc: 98.546


  0%|          | 0/5595 [00:13<?, ?it/s]

Epoch 009: | Loss: 0.03414 | Acc: 98.624


  0%|          | 0/5595 [00:14<?, ?it/s]

Epoch 010: | Loss: 0.04203 | Acc: 98.191


  0%|          | 0/5595 [00:16<?, ?it/s]

Epoch 011: | Loss: 0.03945 | Acc: 98.495


  0%|          | 0/5595 [00:17<?, ?it/s]

Epoch 012: | Loss: 0.03833 | Acc: 98.443


  0%|          | 0/5595 [00:19<?, ?it/s]

Epoch 013: | Loss: 0.04276 | Acc: 98.278


  0%|          | 0/5595 [00:20<?, ?it/s]

Epoch 014: | Loss: 0.03655 | Acc: 98.495


  0%|          | 0/5595 [00:22<?, ?it/s]

Epoch 015: | Loss: 0.03504 | Acc: 98.624


  0%|          | 0/5595 [00:23<?, ?it/s]

Epoch 016: | Loss: 0.03584 | Acc: 98.629


  0%|          | 0/5595 [00:25<?, ?it/s]

Epoch 017: | Loss: 0.03159 | Acc: 98.706


  0%|          | 0/5595 [00:26<?, ?it/s]

Epoch 018: | Loss: 0.03197 | Acc: 98.773


  0%|          | 0/5595 [00:28<?, ?it/s]

Epoch 019: | Loss: 0.03877 | Acc: 98.598


  0%|          | 0/5595 [00:29<?, ?it/s]

Epoch 020: | Loss: 0.03296 | Acc: 98.552


  0%|          | 0/5595 [00:31<?, ?it/s]

Epoch 021: | Loss: 0.03090 | Acc: 98.784


  0%|          | 0/5595 [00:32<?, ?it/s]

Epoch 022: | Loss: 0.03805 | Acc: 98.531


  0%|          | 0/5595 [00:33<?, ?it/s]

Epoch 023: | Loss: 0.03266 | Acc: 98.742


  0%|          | 0/5595 [00:35<?, ?it/s]

Epoch 024: | Loss: 0.03496 | Acc: 98.521


  0%|          | 0/5595 [00:36<?, ?it/s]

Epoch 025: | Loss: 0.03581 | Acc: 98.716


  0%|          | 0/5595 [00:38<?, ?it/s]

Epoch 026: | Loss: 0.05318 | Acc: 97.943


  0%|          | 0/5595 [00:39<?, ?it/s]

Epoch 027: | Loss: 0.03810 | Acc: 98.479


  0%|          | 0/5595 [00:41<?, ?it/s]

Epoch 028: | Loss: 0.02885 | Acc: 98.799


  0%|          | 0/5595 [00:42<?, ?it/s]

Epoch 029: | Loss: 0.02451 | Acc: 99.057


  0%|          | 0/5595 [00:44<?, ?it/s]

Epoch 030: | Loss: 0.02738 | Acc: 98.897


  0%|          | 0/5595 [00:45<?, ?it/s]

Epoch 031: | Loss: 0.03945 | Acc: 98.639


  0%|          | 0/5595 [00:46<?, ?it/s]

Epoch 032: | Loss: 0.03573 | Acc: 98.474


  0%|          | 0/5595 [00:48<?, ?it/s]

Epoch 033: | Loss: 0.03495 | Acc: 98.588


  0%|          | 0/5595 [00:49<?, ?it/s]

Epoch 034: | Loss: 0.04557 | Acc: 98.474


  0%|          | 0/5595 [00:51<?, ?it/s]

Epoch 035: | Loss: 0.03412 | Acc: 98.598


  0%|          | 0/5595 [00:52<?, ?it/s]

Epoch 036: | Loss: 0.04054 | Acc: 98.320


  0%|          | 0/5595 [00:54<?, ?it/s]

Epoch 037: | Loss: 0.03612 | Acc: 98.562


  0%|          | 0/5595 [00:55<?, ?it/s]

Epoch 038: | Loss: 0.02641 | Acc: 98.892


  0%|          | 0/5595 [00:57<?, ?it/s]

Epoch 039: | Loss: 0.03902 | Acc: 98.716


  0%|          | 0/5595 [00:58<?, ?it/s]

Epoch 040: | Loss: 0.04358 | Acc: 98.314


  0%|          | 0/5595 [00:59<?, ?it/s]

Epoch 041: | Loss: 0.03350 | Acc: 98.536


  0%|          | 0/5595 [01:01<?, ?it/s]

Epoch 042: | Loss: 0.03503 | Acc: 98.758


  0%|          | 0/5595 [01:02<?, ?it/s]

Epoch 043: | Loss: 0.03135 | Acc: 98.711


  0%|          | 0/5595 [01:03<?, ?it/s]


KeyboardInterrupt: 