In [1]:
import torch
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as fun
import numpy as np

In [2]:
from lib.data import load_dataset

x_train, y_train, x_test, y_test = load_dataset('dts/train.csv')

In [3]:
from lib.data import dataset
training_set = dataset(x_train, y_train)
testing_set = dataset(x_test, y_test)

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler

training_loader = DataLoader(training_set, batch_size=4, shuffle=True)
testing_loader=DataLoader(testing_set, batch_size=4, shuffle=True)

In [26]:
class network(nn.Module):
    def __init__(self, n_features):
        super(network, self).__init__()
        self.linear1 = nn.Linear(in_features=n_features, out_features=int(n_features/2))
        self.linear2 = nn.Linear(in_features=int(n_features/2), out_features=n_features)
        self.classification = nn.Linear(in_features=n_features, out_features=2)
    def activation(self, f):
        return fun.relu(f)
    def forward(self, x):
        x = self.activation(self.linear1(x))
        x = self.activation(self.linear2(x))
        x = self.classification(x)
        return x

In [6]:
def train(number_of_epochs, net, training_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = opt.SGD(params=net.parameters(), lr=1e-6, momentum=0.65, weight_decay=0.9)
    running_loss = 0
    for epoch in range(number_of_epochs):
        for batch_id, data in enumerate(training_loader):
            imgs, labels = data
            y_pred = net(imgs)
            loss = criterion(y_pred, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if batch_id % 1000 == 999: # prints every 1000 batchs
                print('[%d, %5d] loss: %.5f' % (epoch + 1, batch_id + 1, running_loss / 1000))
            running_loss = 0.0


In [7]:
from sklearn.metrics import confusion_matrix
def test(testing_loader, net):
    correct = 0
    total = 0
    class_correct = list(0. for i in range(2))
    class_total = list(0. for i in range(2))
    net.eval()
    with torch.no_grad():
        for batch_id, data in enumerate(testing_loader):
            imgs, labels = data
            outputs = net(imgs)

            _, predicted = torch.max(outputs.data, 1)

            for p in range(len(predicted)):
                if labels[p] == predicted[p]:
                    if labels[p] == 1:
                        class_correct[0] += 1
                    else:
                        class_correct[1] += 1
                if labels[p] == 1:
                    class_total[0] += 1
                else:
                    class_total[1] += 1
        return class_correct, class_total
        

In [27]:
model = network(x_train.shape[1])
train(number_of_epochs=15, net=model, training_loader=training_loader)

[1,  1000] loss: 0.00069
[1,  2000] loss: 0.00069
[1,  3000] loss: 0.00069
[1,  4000] loss: 0.00069


In [22]:
class_correct, class_total = test(testing_loader, model)
print("Acc: " + str((class_correct[0] + class_correct[1])/(class_total[0] + class_total[1])))

Acc: 0.899125


In [23]:
from lib.data import load_dataset

x_test = load_dataset('dts/test.csv', test_only=True)
testing_set_ = dataset(x_test, np.zeros(x_test.shape))
x_test_loader = DataLoader(testing_set_, batch_size=4)

y_pred = []
model.eval()
with torch.no_grad():
    for batch_id, data in enumerate(x_test_loader):
        imgs, labels = data
        outputs = model(imgs)

        _, predicted = torch.max(outputs.data, 1)
        for i in predicted:
            y_pred.append(i)
y_pred = np.array(y_pred)
# print(y_pred)

In [25]:
import pandas as pd
result = pd.DataFrame(data={
        "Id": range(y_pred.shape[0]),
        "Category": y_pred.astype(int)
    }, index=None)

result.to_csv("result.csv", index=None)