In [1]:
import torch
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as fun
import numpy as np

In [2]:
from lib.data import load_dataset

x_train, y_train, x_test, y_test = load_dataset('dts/train.csv')

In [3]:
from lib.data import dataset
training_set = dataset(x_train, y_train)
testing_set = dataset(x_test, y_test)

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler
n_samples = x_train.shape[0]
idx = [i for i in range(n_samples)]
np.random.shuffle(idx)

split = int(np.floor(n_samples * 0.2)) # 12 000
training_idx, valid_idx = idx[split:], idx[:split]

training_sampler = SubsetRandomSampler(training_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

training_loader = DataLoader(training_set, batch_size=32, sampler=training_sampler)
validation_loader = DataLoader(training_set, batch_size=32, sampler=valid_sampler)

testing_loader=DataLoader(testing_set, batch_size=32, shuffle=True)

In [5]:
class network(nn.Module):
    def __init__(self, n_features):
        super(network, self).__init__()

        self.linear1 = nn.Linear(in_features=n_features, out_features=int(n_features/2))
        self.linear2 = nn.Linear(in_features=int(n_features/2), out_features=n_features)
        self.dropout = nn.Dropout(p=0.5)
        self.classification = nn.Linear(in_features=n_features, out_features=2)
    def activation(self, f):
        return fun.relu(f)
    def forward(self, x):
        x = self.activation(x)
        x_flat = x.view(x.shape[0], -1)
        x_flat = self.activation(self.linear1(x_flat))
        x_flat = self.activation(self.linear2(x_flat))
        x_flat = self.dropout(x_flat)
        x_flat = self.activation(self.activation(x_flat))
        return x_flat

In [20]:
def train(number_of_epochs, net, training_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = opt.Adam(params=net.parameters(), lr=1e-6)
    running_loss = 0
    for epoch in range(number_of_epochs):
        for batch_id, data in enumerate(training_loader):
            imgs, labels = data
            y_pred = net(imgs)
            loss = criterion(y_pred, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if batch_id % 1000 == 999: # prints every 1000 batchs
                print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_id + 1, running_loss / 1000))
            running_loss = 0.0


In [14]:
from sklearn.metrics import confusion_matrix
def test(testing_loader, net):
    correct = 0
    total = 0
    class_correct = list(0. for i in range(2))
    class_total = list(0. for i in range(2))
    net.eval()
    with torch.no_grad():
        for batch_id, data in enumerate(testing_loader):
            imgs, labels = data
            outputs = net(imgs)

            _, predicted = torch.max(outputs.data, 1)

            for p in range(len(predicted)):
                if labels[p] == predicted[p]:
                    if labels[p] == 1:
                        class_correct[0] += 1
                    else:
                        class_correct[1] += 1
                if labels[p] == 1:
                    class_total[0] += 1
                else:
                    class_total[1] += 1
                    
        return class_correct, class_total
        

In [21]:
model = network(x_train.shape[1])
train(number_of_epochs=5, net=model, training_loader=training_loader)

In [22]:
class_correct, class_total = test(testing_loader, model)
print("Acc (False): " + str(class_correct[0]/class_total[0]) + "\nAcc (True): " + str(class_correct[1]/class_total[1]))

Acc (False): 0.3405596168389211
Acc (True): 0.3788742871311679
