In [None]:
import pandas as pd
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from BinaryClassifier import BinaryClassification
from tqdm import tqdm
import neptune.new as neptune

In [None]:
dataset = pd.read_pickle('/home/mdt/datasets/BinClassDatasets/dataset.pckl')


y = dataset.pop('label').to_numpy()
X = dataset.to_numpy()



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=40)

from sklearn.decomposition import PCA

# testing pca effectiveness!
# pca = PCA(80)
# X_train = pca.fit_transform(X_train)
# X_test = pca.transform(X_test)
# print(X_train.shape)

test_loader = DataLoader([[X_test[i], y_test[i]] for i in range(len(X_test))], batch_size=1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


In [None]:
model = BinaryClassification()
model.to(device)

In [None]:
def binary_accuracy(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    acc = torch.round(acc*100)
    return acc
def binary_accuracy_no_sig(y_pred, y_test):
    correct_results_sum = np.sum(y_pred == y_test)
    acc = correct_results_sum / y_test.shape[0]
    return acc

In [None]:
def train(model, epochs, lr, optimizer, criterion, batch_size, run):

    params = {'lr': lr, 'optimizer': 'Adam', 'loss': 'BCE', 'batch size': batch_size, 'epochs': epochs}   
    run['parameters'] = params

    train_loader = DataLoader([[X_train[i], y_train[i]] for i in range(len(X_train))], batch_size=batch_size)

    EPOCHS = epochs
    LR = lr
    optimizer = optimizer(model.parameters(), lr=LR)
    criterion = torch.nn.BCEWithLogitsLoss()

    for epoch in range(1, EPOCHS+1):
        epoch_loss = 0
        epoch_acc = 0

        model.train()

        for X_batch, y_batch in tqdm(train_loader, total=int(len(X_train)/batch_size)+1, desc='Epoch - {}'.format(epoch)):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()

            y_pred = model(X_batch)

            loss = criterion(y_pred, y_batch.unsqueeze(1).float())
            acc = binary_accuracy(y_pred, y_batch.unsqueeze(1))

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

        run['train/loss'].log(epoch_loss/len(train_loader))
        run['train/acc'].log(epoch_acc/len(train_loader))
        print(f'Epoch {epoch+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
    # return model

In [None]:
run = neptune.init(
        project="mdt/TCAM",
        api_token="NO",
)

train(model, epochs=40, lr=0.01, optimizer=optim.Adam, criterion=torch.nn.BCEWithLogitsLoss(), batch_size=128, run=run)

from sklearn.metrics import confusion_matrix, classification_report
test_loader = DataLoader([[X_test[i], y_test[i]] for i in range(len(X_test))], batch_size=1)

y_pred_list = []
model.eval()
num_right = 0
with torch.no_grad():
    for X_batch, y_true in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        p = y_pred_tag.cpu().numpy()
        y_pred_list.append(p)
y_pred_list = [int(a.squeeze().tolist()) for a in y_pred_list]
print(binary_accuracy_no_sig(y_pred_list, y_test))
run['test/acc'].log(binary_accuracy_no_sig(y_pred_list, y_test))

run.stop()
