
# Training a CNN 

=====================

In this notebook, we use a CNN to train a model on the dataset. <br>
The Code is very computationally intensive,<br>
and it takes more than 30 min to train the model on GTX970, <br>
if we want decent results. 


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
def confusion(prediction, truth):
    """ Returns the confusion matrix for the values in the `prediction` and `truth`
    tensors, i.e. the amount of positions where the values of `prediction`
    and `truth` are
    - 1 and 1 (True Positive)
    - 1 and 0 (False Positive)
    - 0 and 0 (True Negative)
    - 0 and 1 (False Negative)
    """

    confusion_vector = prediction / truth
    # Element-wise division of the 2 tensors returns a new tensor which holds a
    # unique value for each case:
    #   1     where prediction and truth are 1 (True Positive)
    #   inf   where prediction is 1 and truth is 0 (False Positive)
    #   nan   where prediction and truth are 0 (True Negative)
    #   0     where prediction is 0 and truth is 1 (False Negative)

    true_positives = torch.sum(confusion_vector == 1).item()
    false_positives = torch.sum(confusion_vector == float('inf')).item()
    true_negatives = torch.sum(torch.isnan(confusion_vector)).item()
    false_negatives = torch.sum(confusion_vector == 0).item()

    return true_positives, false_positives, true_negatives, false_negatives

### 1. Load and inspect the data

In [None]:
from sklearn.model_selection import train_test_split

df = pd.read_csv('..\\Data\\data_01_clean.csv',index_col='Unnamed: 0')

X=df.drop(columns=['Dev'])
y= df['Dev']

X = X.values # to numpy-array
y = y.values # to numpy-array


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset=list(zip(X_train,y_train))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

testset=list(zip(X_test,y_test))
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

classes = ('normal','Anomaly')

Let us show some of the training images, for fun.



### 2. Define a Convolutional Neural Network

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class DeepNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(39, 4096)
        self.hidden2 = nn.Linear(4096, 8192)
        self.hidden3 = nn.Linear(8192, 1000)
        self.hidden4 = nn.Linear(1000, 100)
        self.hidden5 = nn.Linear(100, 2)

    def forward(self, x):
        
        z1 = self.hidden1(x)
        a1 = torch.sigmoid(z1)
        z2 = self.hidden2(a1)
        a2 = torch.sigmoid(z2)
        z3 = self.hidden3(a2)
        a3 = torch.sigmoid(z3)
        z4 = self.hidden4(a3)
        a4 = torch.sigmoid(z4)
        z5 = self.hidden5(a4)
        return z5

### 4. Train the network

In [None]:
import torch.optim as optim
import time

net = DeepNeuralNetwork()
net = net.float()
net.to(device)

train_losses = []
train_accuracy = []
test_accuracy  = []

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

start_time = time.time()

for epoch in range(10):

    running_loss = 0.0
    
    net.train()
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        
        inputs = inputs.to(device) #needed when using gpu
        labels = labels.to(device) #needed when using gpu

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            train_losses.append(running_loss / 2000)
            running_loss = 0.0
            
        correct = 0
        
        torch.save(net.state_dict(), f'..\\Models\\net_e{epoch}.pt')
    
#---------------------------Test---Testing----------------------------------------------------------
    net.eval()   
     
    correct = 0
    total = 0
    
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device) #needed when using gpu
            labels = labels.to(device) #needed when using gpu
            
            # calculate outputs by running images through the network
            outputs = net(images.float())
            # the class with the highest energy is what we choose as prediction
            predicted = torch.argmax(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            test_accuracy.append(confusion(predicted, labels)+(epoch+1,))

    
#---------------------------Train---Testing----------------------------------------------------------

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in trainloader:
            images, labels = data
            images = images.to(device) #needed when using gpu
            labels = labels.to(device) #needed when using gpu
            
            # calculate outputs by running images through the network
            outputs = net(images.float())
            # the class with the highest energy is what we choose as prediction
            predicted = torch.argmax(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            train_accuracy.append(confusion(predicted, labels)+(epoch+1,))


print('Finished Training')
print(f'Time: {time.time() - start_time:.1f}s')

Load model from file 

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses, label= "Training losses")

In [None]:
res_df = pd.DataFrame(test_accuracy,columns=['true_positives', 'false_positives', 'true_negatives', 'false_negatives','epoch'])
res_df=res_df.groupby('epoch').agg({'true_positives':'sum', 'false_positives':'sum', 'true_negatives':'sum', 'false_negatives':'sum'}).reset_index()
res_df['accuracy']=(res_df['true_positives']+res_df['true_negatives'])/X_test.shape[0]
res_df['recall']=res_df['true_positives']/(res_df['true_positives']+res_df['false_negatives'])
res_df['precission']=res_df['true_positives']/(res_df['true_positives']+res_df['false_positives'])
res_df

In [None]:
res_df = pd.DataFrame(train_accuracy,columns=['true_positives', 'false_positives', 'true_negatives', 'false_negatives','epoch'])
res_df=res_df.groupby('epoch').agg({'true_positives':'sum', 'false_positives':'sum', 'true_negatives':'sum', 'false_negatives':'sum'}).reset_index()
res_df['accuracy']=(res_df['true_positives']+res_df['true_negatives'])/X_train.shape[0]
res_df['recall']=res_df['true_positives']/(res_df['true_positives']+res_df['false_negatives'])
res_df['precission']=res_df['true_positives']/(res_df['true_positives']+res_df['false_positives'])
res_df

### 5. Test the network on the test data

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device) #needed when using gpu
        labels = labels.to(device) #needed when using gpu
        
        # calculate outputs by running images through the network
        outputs = net(images.float())
        # the class with the highest energy is what we choose as prediction
        predicted = torch.argmax(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test data: {100 * correct // total} %')

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        images = images.to(device) #needed when using gpu
        labels = labels.to(device) #needed when using gpu
        
        # calculate outputs by running images through the network
        outputs = net(images.float())
        # the class with the highest energy is what we choose as prediction
        predicted = torch.argmax(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the train data: {100 * correct // total} %')