In [1]:
# loading packages
import pickle
import numpy as np
import os
import torch
import torch.optim as optim
import torch.nn as nn

In [2]:
# load data
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding = 'bytes')
    return dict

## function to load a batch
def load_CIFAR_batch(filename):
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding = 'latin1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype('float')
        Y = np.array(Y)
    return X, Y

## function to load the whole dataset
def load_CIFAR10():
    xs = []
    ys = []
    for b in range(1,6):
        location = 'data_batch_' + str(b)
        X, Y = load_CIFAR_batch(location)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch('test_batch')
    return Xtr, Ytr, Xte, Yte

## load dataset
x_train, y_train, x_test, y_test = load_CIFAR10()

In [3]:
# flatten the dataset
x_train = x_train.reshape(50000, 96, 32)
x_test = x_test.reshape(10000, 96, 32)
y_train = y_train.reshape(50000, 1)
y_test = y_test.reshape(10000, 1)

In [4]:
# set up device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
# Transform np.array into tensor
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).long().to(device)
x_test_tensor = torch.from_numpy(x_test).float().to(device)
y_test_tensor = torch.from_numpy(y_test).long().to(device)

In [38]:
# numbers of input
n_step = 96    # the number of time steps in each input stream. 
               # correspond to seq_len of the input dimensions (seq_len, batch, input_size)
# dimension of input
## n_input = 32
input_dim = 32    #  The number of expected features in the input
# hyperparameters
n_hidden_units = 128    # The number of features in the hidden state h
n_layers = 1

In [39]:
# Build RNN model
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Define LSTM layer
        self.lstm_1 = nn.LSTM(input_size = input_dim, hidden_size = n_hidden_units, num_layers = 1)
        # Define dense output layer, 10 units - one for each category
        self.dense_1 = nn.Linear(n_hidden_units, 10) # num_directions = 1 by default, 
                                                         # due to bidirectional set to False by default
        # Define activation output
        self.softmax = nn.Softmax(dim = 1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x, states = self.lstm_1(x)
        x = self.dense_1(x)
        x = self.softmax(x)
        
        return x
    
#    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
#        hidden = torch.zeros(n_layers, batch_size, hidden_dim)
#        return hidden


In [40]:
(batch_x.shape)

torch.Size([1024, 96, 32])

In [41]:
model = Network()
print(model)

Network(
  (lstm_1): LSTM(32, 128)
  (dense_1): Linear(in_features=128, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


In [43]:
# Training

n_epochs = 10
batch_size = 1024    # minibatches of size 1024
n_batch = 10    # number of minibatches

# Define optimizer
learning_rate = 0.001
optimizer = optim.SGD(model.parameters(), lr = learning_rate)
## optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# Define loss
criterion = nn.CTCLoss()
# criterion = nn.CrossEntropyLoss()
# criterion = nn.NLLLoss()

# Define accuracy = correct / total
train_total = 0
train_correct = 0

# Define steps & print_every
steps = 0
print_every = 100

model.train()
for epoch in range(n_epochs):
    # set initial running loss
    running_loss = 0
    # get a permutation for batch sampling
    permutation = torch.randperm(y_train_tensor.size()[0])
    
    # for i in range(0, y_train_tensor.size()[0], batch_size):
    for i in range(0, batch_size*n_batch, batch_size):    # batch_size * n_batch = total number of observations
        steps += 1
        # sample a batch
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = x_train_tensor[indices,:], y_train_tensor[indices]
        batch_y = batch_y.squeeze()
        # zero grad for each batch to start with
        optimizer.zero_grad()
        # run model
        outputs = model.forward(batch_x)
        input_lengths = torch.full(size=(batch_size,), fill_value=n_step, dtype=torch.long)
        target_lengths = torch.full(size=(batch_size,), fill_value = 1, dtype=torch.long)
        loss = criterion(outputs.transpose(0,1), batch_y, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        # accessing training result information
        running_loss += loss.item()
        train_total += batch_y.size()[0]
        _, train_predicted = torch.max(outputs, 1)
        train_correct += (train_predicted.transpose(0,1) == batch_y.unsqueeze(1).transpose(0,1)).sum().item()
        # testing
        # if steps % print_every == 0:
            

In [44]:
# Model validation
test_loss = 0
test_accuracy = 0
test_correct = 0
test_total = 0
model.eval()
with torch.no_grad():
    test_outputs = model.forward(x_test_tensor)
    input_lengths = torch.full(size=(x_test_tensor.shape[0],), fill_value=n_step, dtype=torch.long)
    target_lengths = torch.full(size=(x_test_tensor.shape[0],), fill_value = 1, dtype=torch.long)
    batch_loss = criterion(test_outputs.transpose(0,1), y_test_tensor, input_lengths, target_lengths)
    test_loss += batch_loss.item()
                
    test_total += y_test_tensor.shape[0]
    _, test_predicted = torch.max(test_outputs, 1)
    test_correct += (test_predicted.transpose(0,1) == y_test_tensor.transpose(0,1)).sum().item()

In [45]:
print(running_loss)
print('train_accuracy', train_correct/train_total)
print('test_accuracy', test_correct/test_total)
print('train_loss', running_loss/batch_size/n_batch)
print('test_loss', test_loss/len(y_test_tensor))

-94.54211044311523
train_accuracy 0.074755859375
test_accuracy 0.0729
train_loss -0.009232627972960473
test_loss -0.000945539665222168


In [None]:
#(batch_y.shape, outputs.shape, batch_x.shape)

In [None]:
#(outputs.squeeze().shape)

In [None]:
#(outputs.transpose(0,1).shape)

In [None]:
#(train_predicted.transpose(0,1).shape, batch_y.unsqueeze(1).transpose(0,1).shape)

In [None]:
#(train_predicted.transpose(0,1) == batch_y.unsqueeze(1).transpose(0,1))

In [None]:
#(y_test_tensor.shape)

In [None]:
#(train_correct/train_total)

In [None]:
#(x_test_tensor.shape[0])

In [None]:
#(batch_y.shape)