In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

In [2]:
SEQ_LEN = 4
BATCH_SIZE = 64

In [3]:
training_data = pd.read_csv("task3_ks39mcp5/train.csv")
test_data = pd.read_csv("task3_ks39mcp5/test.csv")

In [4]:
def sequence(seq):
    vector = [ord(letter) for letter in seq]
    return vector

In [5]:
def process_data(data):
    X = np.zeros((data.shape[0], SEQ_LEN))
    for i in range(data.shape[0]):
        X[i,:] = sequence(data.iloc[i, 0])
    return X

In [6]:
X_train = process_data(training_data)
y_train = training_data['Active']

In [7]:
X = []
y = []
for i in range(0, X_train.shape[0], 64):
    X.append(X_train[i:i+64, :])
    y.append(y_train[i:i+64])

X_train = Variable(torch.Tensor(X))
y_train = Variable(torch.Tensor(y))

In [8]:
X_test = process_data(test_data)

In [9]:
y_train = torch.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

In [21]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        res = self.linear_relu_stack(x)
        return res


In [11]:
def train(X_train, y_train, model, loss_fn, optimizer):
    size = X_train.shape[0]*BATCH_SIZE
    for batch in range(X_train.shape[0]):
        X = X_train[batch,:,:]
        y = y_train[batch]
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [12]:
def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [25]:
model = NeuralNetwork()
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [26]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(X_train, y_train, model, loss_fn, optimizer)
print("Done!")

Epoch 1
-------------------------------
loss: 1.224924  [    0/112000]
loss: 0.279283  [ 6400/112000]
loss: 0.246594  [12800/112000]
loss: 0.036671  [19200/112000]
loss: 0.171770  [25600/112000]
loss: 0.278371  [32000/112000]
loss: 0.171179  [38400/112000]
loss: 0.142478  [44800/112000]
loss: 0.236338  [51200/112000]
loss: 0.120944  [57600/112000]
loss: 0.079787  [64000/112000]
loss: 0.177671  [70400/112000]
loss: 0.041399  [76800/112000]
loss: 0.331096  [83200/112000]
loss: 0.044973  [89600/112000]
loss: 0.031400  [96000/112000]
loss: 0.173785  [102400/112000]
loss: 0.270141  [108800/112000]
Epoch 2
-------------------------------
loss: 0.032979  [    0/112000]
loss: 0.261760  [ 6400/112000]
loss: 0.237740  [12800/112000]
loss: 0.036744  [19200/112000]
loss: 0.169968  [25600/112000]
loss: 0.272517  [32000/112000]
loss: 0.166356  [38400/112000]
loss: 0.143586  [44800/112000]
loss: 0.236761  [51200/112000]
loss: 0.115380  [57600/112000]
loss: 0.081095  [64000/112000]
loss: 0.175972  [70

In [27]:
pred = model(X_train[0,:])

In [28]:
pred

tensor([[0.0101],
        [0.0185],
        [0.0111],
        [0.0140],
        [0.0175],
        [0.0832],
        [0.0159],
        [0.0079],
        [0.0207],
        [0.0315],
        [0.0142],
        [0.0173],
        [0.0527],
        [0.0490],
        [0.0158],
        [0.0123],
        [0.0119],
        [0.0183],
        [0.0231],
        [0.0601],
        [0.0382],
        [0.0134],
        [0.0072],
        [0.0107],
        [0.0284],
        [0.0440],
        [0.0239],
        [0.0268],
        [0.0297],
        [0.0304],
        [0.0188],
        [0.0072],
        [0.0627],
        [0.0097],
        [0.0100],
        [0.0268],
        [0.0796],
        [0.0418],
        [0.0732],
        [0.0312],
        [0.0571],
        [0.0225],
        [0.0140],
        [0.0634],
        [0.0365],
        [0.0516],
        [0.0368],
        [0.0126],
        [0.0633],
        [0.0491],
        [0.0185],
        [0.0285],
        [0.0146],
        [0.0391],
        [0.0207],
        [0