In [13]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import numpy as np
import pandas as pd
from rnn import StochasticLSTM

In [14]:
training_table = pd.read_table("./data/hill-valley/Hill_Valley_without_noise_Training.data", sep=',', dtype=np.float64)
testing_table = pd.read_table("./data/hill-valley/Hill_Valley_without_noise_Testing.data", sep=',', dtype=np.float64)

In [3]:
training_table.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X92,X93,X94,X95,X96,X97,X98,X99,X100,class
0,1317.265789,1315.220951,1312.770581,1309.834252,1306.315588,1302.099102,1297.046401,1290.991646,1283.736109,1275.041652,...,1327.575109,1327.57535,1327.575552,1327.575719,1327.575859,1327.575976,1327.576074,1327.576155,1327.576223,0.0
1,7329.967624,7379.907443,7441.799231,7518.503422,7613.565031,7731.377492,7877.385707,8058.337694,8282.596458,8560.526497,...,7121.300474,7121.300438,7121.30041,7121.300387,7121.300368,7121.300353,7121.300341,7121.300331,7121.300323,1.0
2,809.42141,809.780119,810.207191,810.715653,811.321016,812.041748,812.899834,813.921452,815.137768,816.585886,...,807.545134,807.544181,807.543381,807.542709,807.542144,807.54167,807.541272,807.540937,807.540656,1.0
3,45334.20888,45334.21356,45334.21906,45334.2255,45334.23305,45334.24191,45334.2523,45334.26448,45334.27876,45334.29552,...,47550.92171,47224.45771,46946.07276,46708.68615,46506.25997,46333.64552,46186.45237,46060.93667,45953.90593,1.0
4,1.810359,1.810359,1.810359,1.810359,1.810359,1.810359,1.810359,1.810359,1.810359,1.810359,...,1.790275,1.794794,1.798296,1.80101,1.803114,1.804744,1.806008,1.806987,1.807746,0.0


In [15]:
batch_size = 5
dim = 1
seq_length = 100

In [16]:
input_columns = [f"X{i}" for i in range(1, 101)]
label_column = "class"

def get_raw_data(table):
    return [(row[input_columns].values, row[label_column]) for i, row in table.iterrows()]

def get_data_loader(table, batch_size=5, shuffle=False):
    return torch.utils.data.DataLoader(
        get_raw_data(table),
        batch_size=batch_size,
        num_workers=2,
        shuffle=shuffle
    )

def format_input(input_batch):
    return input_batch.transpose(1, 0).reshape(seq_length, -1, 1)

In [17]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.rnn = StochasticLSTM(1, 10, 0.75)
        self.fc = nn.Linear(10, 2)
    
    def forward(self, x):
        out, _ = self.rnn(x)
        out = out[-1]
        out = F.softmax(self.fc(out), dim=1)
        return out

In [18]:
net = Net().double()

In [19]:
train_dl = get_data_loader(training_table, batch_size=1, shuffle=True)
test_dl = get_data_loader(testing_table, batch_size=1, shuffle=False)

In [None]:
criterion = DropoutCELoss(1e-1, 0.6, 0.75, net.rnn.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [25]:
for epoch in range(5):
    running_loss = 0.0
    
    for i, (inputs, labels) in enumerate(train_dl):
        inputs = format_input(inputs)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels.long(), 606)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.6f' %
                  (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0

print("Finish training")

[1,    50] loss: 0.653141
[1,   100] loss: 0.696216
[1,   150] loss: 0.691544
[1,   200] loss: 0.742371
[1,   250] loss: 0.736022
[1,   300] loss: 0.687860
[1,   350] loss: 0.740323
[1,   400] loss: 0.694245
[1,   450] loss: 0.730551
[1,   500] loss: 0.700554
[1,   550] loss: 0.669152
[1,   600] loss: 0.725907
[2,    50] loss: 0.694977
[2,   100] loss: 0.707384
[2,   150] loss: 0.695005
[2,   200] loss: 0.674142
[2,   250] loss: 0.717182
[2,   300] loss: 0.744442
[2,   350] loss: 0.684122
[2,   400] loss: 0.700369
[2,   450] loss: 0.692869
[2,   500] loss: 0.704508
[2,   550] loss: 0.693576
[2,   600] loss: 0.708912
[3,    50] loss: 0.704991
[3,   100] loss: 0.709955
[3,   150] loss: 0.674131
[3,   200] loss: 0.700422
[3,   250] loss: 0.700586


KeyboardInterrupt: 

In [12]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_dl:
        inputs = format_input(inputs)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network: %d %%' % (
    100 * correct / total))

Accuracy of the network: 48 %


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
for inputs, labels in train_dl:
    for i, _input in enumerate(inputs):
        plt.figure()
        plt.plot(_input)
        plt.title(labels[i])
        break
    break