In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Load the Drive helper and mount
#from google.colab import drive
#drive.mount('/content/drive')

#cheaters = np.load("drive/MyDrive/Comp 451 Final Project/data/cheaters.npy")
#clean = np.load("drive/MyDrive/Comp 451 Final Project/data/legit.npy")

#Upsample cheaters
#cheaters = np.repeat(cheaters, 5, axis=0)

In [3]:
cheaters = np.load("data/cheaters.npy")
clean = np.load("data/legit.npy")

#Upsample cheaters
cheaters = np.repeat(cheaters, 5, axis=0)

In [4]:
#Create labels for both
cheaters_labels = np.ones(10000, dtype=np.float32)
clean_labels = np.zeros(10000, dtype=np.float32)

In [5]:
#Create combined data and labels arrays

x = np.concatenate((cheaters, clean))
y = np.concatenate((cheaters_labels, clean_labels))

del cheaters
del clean
del cheaters_labels
del clean_labels

In [6]:
#Create training, validation, and testing sets

#20% for test set
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=0.2, stratify=y, random_state=17)

#20% for validation set, 60% for training
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, stratify=y_train, random_state=17)


del x
del y

In [7]:
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x)
        self.y = torch.tensor(y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, i):
        return self.x[i], self.y[i]


In [8]:
#Hyperparameters

num_epochs = 10
learning_rate = 0.0001
batch_size = 16

In [9]:
train_dataset = CustomDataset(x_train, y_train)
validation_dataset = CustomDataset(x_val, y_val)
test_dataset = CustomDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [10]:
#Model - JUST TESTING RIGHT NOW

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        #Input = (1, 30, 192, 5)
        self.conv1 = nn.Conv3d(1, 32, kernel_size=3, padding='same')

        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding='same')
        
        self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding='same')
        
        self.conv4 = nn.Conv3d(128, 256, kernel_size=3, padding='same')
        

        #Reduce in half
        self.pool = nn.MaxPool3d(kernel_size=2)

        #(256, 15, 96, 2)
        self.fc1 = nn.Linear(256*15*96*2, 256)
        self.fc2 = nn.Linear(256,32)
        self.fc3 = nn.Linear(32,1)


    def forward(self, x):
        #Add a dimension for channel
        x = x.unsqueeze(1)

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))

        x = self.pool(x)

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [11]:
model = Net()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

In [12]:
#MODEL TRAINING

best_validation_loss = np.inf
best_model = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    training_loss = 0.0

    model.train()

    #TRAINING
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs).squeeze()

        #print(outputs)

        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        training_loss += loss.item()


    #VALIDATION
    model.eval()
    validation_loss = 0.0

    with torch.no_grad():
        for inputs, labels in validation_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs).squeeze()
            loss = loss_fn(outputs, labels)

            validation_loss += loss.item()

    validation_loss = validation_loss / len(validation_loader)

    scheduler.step(validation_loss)

    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_model = copy.deepcopy(model.state_dict())

    print(training_loss / len(train_loader))
    print(validation_loss)

# Load the best model
model.load_state_dict(best_model)
print(f'Best Validation Loss: {best_validation_loss:.4f}')

0.695533310731252
0.6698111934661866
0.5916887171268463
0.519038637638092
0.2863298390458027
0.32120705059170723
0.0620085277093652
0.35036474744416773
0.02646974816271298
0.3655677639814094
0.02849385210676701
0.4986023560301401
0.01074636545565833
0.4679183023109799
0.0048051050316959545
0.3654648528418038
0.004402330020886439
0.37595154805865605
0.0033837077766123306
0.3621768632570747
Best Validation Loss: 0.3212


In [24]:

def test_CNN(threshold):
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs).squeeze()

            #outputs = F.sigmoid(outputs).round()

            outputs = F.sigmoid(outputs)
            outputs = (outputs >= threshold).float()


            total += labels.size(0)
            correct += (outputs == labels).sum().item()


    print('Threshold: %.2f, Accuracy of the network: %.2f %%' % (threshold, 100 * correct / total))

In [25]:
for threshold in range(50, 100, 1):
    test_CNN(threshold/100)

Threshold: 0.50, Accuracy of the network: 88.10 %
Threshold: 0.51, Accuracy of the network: 88.08 %
Threshold: 0.52, Accuracy of the network: 88.12 %
Threshold: 0.53, Accuracy of the network: 88.12 %
Threshold: 0.54, Accuracy of the network: 88.15 %
Threshold: 0.55, Accuracy of the network: 88.28 %
Threshold: 0.56, Accuracy of the network: 88.15 %
Threshold: 0.57, Accuracy of the network: 87.90 %
Threshold: 0.58, Accuracy of the network: 87.90 %
Threshold: 0.59, Accuracy of the network: 87.88 %
Threshold: 0.60, Accuracy of the network: 87.78 %
Threshold: 0.61, Accuracy of the network: 87.90 %
Threshold: 0.62, Accuracy of the network: 87.90 %
Threshold: 0.63, Accuracy of the network: 87.70 %
Threshold: 0.64, Accuracy of the network: 87.62 %
Threshold: 0.65, Accuracy of the network: 87.42 %
Threshold: 0.66, Accuracy of the network: 87.20 %
Threshold: 0.67, Accuracy of the network: 87.08 %
Threshold: 0.68, Accuracy of the network: 86.72 %
Threshold: 0.69, Accuracy of the network: 86.47 %
