In [96]:
import torch
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as fun
import numpy as np
import pandas as pd

In [97]:
from lib.data import load_dataset

x_train, y_train, x_test, y_test = load_dataset('dts/train.csv')

In [None]:
sel = SelectFromModel(estimator=LogisticRegression(random_state=rs).fit(x_train, y_train))
sel.fit(x_train, y_train)

x_train = sel.transform(x_train)
x_test = sel.transform(y_test)

In [98]:
from lib.data import dataset
training_set = dataset(x_train, y_train)
testing_set = dataset(x_test, y_test, training_set.sc)

In [99]:
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler

training_loader = DataLoader(training_set, batch_size=8, shuffle=True)
testing_loader=DataLoader(testing_set, batch_size=8, shuffle=True)

In [100]:
from math import sqrt
class network(nn.Module):
    def __init__(self, n_features):
        super(network, self).__init__()
        self.linear1 = nn.Linear(in_features=n_features, out_features=int(sqrt(n_features)))
        self.linear2 = nn.Linear(in_features=int(sqrt(n_features)), out_features=n_features)
        self.classification = nn.Linear(in_features=n_features, out_features=1)
    def activation(self, f):
        return fun.relu(f)
    def forward(self, x):
        x = self.activation(self.linear1(x))
        x = self.activation(self.linear2(x))
        x = self.classification(x)
        x = fun.sigmoid(x)
        return x

In [101]:
from nn_lucy import nnModel

In [102]:
def train(number_of_epochs, net, training_loader):
    criterion = nn.BCELoss()
    optimizer = opt.SGD(params=net.parameters(), lr=0.5, momentum=0.2, weight_decay=0.9)
    running_loss = 0
    net.train()
    for epoch in range(number_of_epochs):
        for batch_id, data in enumerate(training_loader):
            imgs, labels = data
            y_pred = net(imgs)

            # print(y_pred)
            loss = criterion(y_pred, labels.reshape(-1, 1))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if batch_id % 1000 == 99: # prints every 1000 batchs
                print('[%d, %5d] loss: %.5f' % (epoch + 1, batch_id + 1, running_loss / 1000))
            running_loss = 0.0


In [103]:
from sklearn.metrics import confusion_matrix
def test(testing_loader, net):
    correct = 0
    total = 0
    class_correct = list(0. for i in range(2))
    class_total = list(0. for i in range(2))
    net.eval()
    acc = 0
    with torch.no_grad():
        for batch_id, data in enumerate(testing_loader):
            imgs, labels = data
            predicted = net(imgs)
            
            # _, predicted = torch.max(outputs.data, 1)
            aux = []
            
            for i in range(len(predicted)):
                if predicted[i] > 0.5:
                    aux.append(1)
                else: aux.append(0)
            for p in range(len(aux)):
                if labels[p] == aux[    p]:
                    if labels[p] == 1:
                        class_correct[0] += 1
                    else:
                        class_correct[1] += 1
                if labels[p] == 1:
                    class_total[0] += 1
                else:
                    class_total[1] += 1
        
        return class_correct, class_total
        

In [114]:
model = nnModel(inFeatures = x_train.shape[1], random_state=2)
# train(number_of_epochs=10, net=model, training_loader=training_loader)
model.fit(trainloader = training_loader, numEpochs=10, learningRate=0.05, regularization=0.01)

([1673.6196217872202,
  1274.9052579579875,
  1262.336001103744,
  1260.5273319603875,
  1272.0955765731633,
  1269.1104264240712,
  1256.7407092731446,
  1255.3614376131445,
  1251.3021198706701,
  1249.9038459211588],
 [])

In [115]:
y_pred = model.predict(torch.tensor(x_test.values, dtype=torch.float32))

In [116]:
class_correct, class_total = test(testing_loader, model)
print("Acc: " + str((class_correct[0] + class_correct[1])/(class_total[0] + class_total[1])))

Acc: 0.84625


In [117]:
from lib.data import load_dataset

x_test = load_dataset('dts/test.csv', test_only=True)
testing_set_ = dataset(x_test, np.zeros(x_test.shape))
x_test_loader = DataLoader(testing_set_, batch_size=32)

ypred = model.predict(torch.tensor(x_test.values, dtype=torch.float32))
# print(y_pred)

In [113]:
import pandas as pd
result = pd.DataFrame(data={
        "Id": range(len(ypred)),
        "Category": ypred.astype(int)
    }, index=None)

result.to_csv("result.csv", index=None)