In [61]:
import torch
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as fun
import numpy as np
import pandas as pd

In [62]:
from lib.data import load_dataset

x_train, y_train, x_test, y_test = load_dataset('dts/train.csv')

In [63]:
# from sklearn.datasets import load_breast_cancer
# from sklearn.model_selection import train_test_split
# data = load_breast_cancer()
# x = data['data']
# y = data['target']

# x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=1)

# train, test = train_test_split(data, test_size=0.2, random_state=2)

In [64]:
from lib.data import dataset
training_set = dataset(x_train, y_train)
testing_set = dataset(x_test, y_test)

In [65]:
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler

training_loader = DataLoader(training_set, batch_size=32, shuffle=True)
testing_loader=DataLoader(testing_set, batch_size=32, shuffle=True)

In [66]:
class network(nn.Module):
    def __init__(self, n_features):
        super(network, self).__init__()
        self.linear1 = nn.Linear(in_features=n_features, out_features=int(n_features/2))
        self.linear2 = nn.Linear(in_features=int(n_features/2), out_features=n_features)
        self.classification = nn.Linear(in_features=n_features, out_features=1)
    def activation(self, f):
        return fun.relu(f)
    def forward(self, x):
        x = self.activation(self.linear1(x))
        x = self.activation(self.linear2(x))
        x = self.classification(x)
        x = fun.sigmoid(x)
        return x

In [67]:
def train(number_of_epochs, net, training_loader):
    criterion = nn.BCELoss()
    optimizer = opt.SGD(params=net.parameters(), lr=1.7e-2)# momentum=0.65, weight_decay=0.9)
    running_loss = 0
    net.train()
    for epoch in range(number_of_epochs):
        for batch_id, data in enumerate(training_loader):
            imgs, labels = data
            y_pred = net(imgs)

            # print(y_pred)
            loss = criterion(y_pred, labels.reshape(-1, 1))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if batch_id % 1000 == 99: # prints every 1000 batchs
                print('[%d, %5d] loss: %.5f' % (epoch + 1, batch_id + 1, running_loss / 1000))
            running_loss = 0.0


In [68]:
from sklearn.metrics import confusion_matrix
def test(testing_loader, net):
    correct = 0
    total = 0
    class_correct = list(0. for i in range(2))
    class_total = list(0. for i in range(2))
    net.eval()
    acc = 0
    with torch.no_grad():
        for batch_id, data in enumerate(testing_loader):
            imgs, labels = data
            predicted = net(imgs)
            
            # _, predicted = torch.max(outputs.data, 1)
            aux = []
            
            for i in range(len(predicted)):
                if predicted[i] > 0.5:
                    aux.append(1)
                else: aux.append(0)
            for p in range(len(aux)):
                if labels[p] == aux[    p]:
                    if labels[p] == 1:
                        class_correct[0] += 1
                    else:
                        class_correct[1] += 1
                if labels[p] == 1:
                    class_total[0] += 1
                else:
                    class_total[1] += 1
        
        return class_correct, class_total
        

In [74]:
model = network(x_train.shape[1])
train(number_of_epochs=20, net=model, training_loader=training_loader)

[1,   100] loss: 0.00070
[2,   100] loss: 0.00028
[3,   100] loss: 0.00025
[4,   100] loss: 0.00007
[5,   100] loss: 0.00007
[6,   100] loss: 0.00003
[7,   100] loss: 0.00006
[8,   100] loss: 0.00001
[9,   100] loss: 0.00002
[10,   100] loss: 0.00000
[11,   100] loss: 0.00002
[12,   100] loss: 0.00002
[13,   100] loss: 0.00005
[14,   100] loss: 0.00003
[15,   100] loss: 0.00000
[16,   100] loss: 0.00000
[17,   100] loss: 0.00000
[18,   100] loss: 0.00000
[19,   100] loss: 0.00000
[20,   100] loss: 0.00000


In [75]:
class_correct, class_total = test(testing_loader, model)
print("Acc: " + str((class_correct[0] + class_correct[1])/(class_total[0] + class_total[1])))

Acc: 0.98225


In [78]:
from lib.data import load_dataset

x_test = load_dataset('dts/test.csv', test_only=True)
testing_set_ = dataset(x_test, np.zeros(x_test.shape))
x_test_loader = DataLoader(testing_set_, batch_size=32)

y_pred = []
model.eval()
with torch.no_grad():
    for batch_id, data in enumerate(x_test_loader):
        imgs, labels = data
        outputs = model(imgs)

        _, predicted = torch.max(outputs.data, 1)

            
        for i in range(len(predicted)):
            if predicted[i] > 0.5:
                y_pred.append(1)
            else: y_pred.append(0)
y_pred = np.array(y_pred)
# print(y_pred)



In [79]:
import pandas as pd
result = pd.DataFrame(data={
        "Id": range(y_pred.shape[0]),
        "Category": y_pred.astype(int)
    }, index=None)

result.to_csv("result.csv", index=None)