In [1]:
# loading packages
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
import scipy.io as sio

In [2]:
# set up device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Load data
train_data = sio.loadmat('train_32x32.mat')
test_data = sio.loadmat('test_32x32.mat')
extra_data = sio.loadmat('extra_32x32.mat')

x_train = train_data['X']
y_train = train_data['y']
x_test = test_data['X']
y_test = test_data['y']
x_extra = extra_data['X']
y_extra = extra_data['y']

# Use extra data as train data
x_train = np.concatenate([x_train, x_extra], axis = 3)
y_train = np.concatenate([y_train, y_extra])

# Inspect shape of data
print(x_train.shape, y_train.shape, 
      x_test.shape, y_test.shape, 
      np.unique(y_test))

# reshape the data
x_train = x_train.reshape((604388, 32*32*3))
x_test = x_test.reshape((26032, 32*32*3))
print(x_train.shape, x_test.shape)

# Normalize data
x_train_norm = x_train/255
x_test_norm = x_test/255
# y_train_norm = y_train/10
# y_test_norm = y_test/10
y_train_norm = y_train
y_test_norm = y_test

(32, 32, 3, 604388) (604388, 1) (32, 32, 3, 26032) (26032, 1) [ 1  2  3  4  5  6  7  8  9 10]
(604388, 3072) (26032, 3072)


In [4]:
# Set y = 10 to y = 0
y_train_norm[y_train_norm==10]=0
y_test_norm[y_test_norm==10]=0

In [5]:

# Transform np.array into tensor
x_train_tensor = torch.from_numpy(x_train_norm).float().to(device)
y_train_tensor = torch.from_numpy(y_train_norm).long().to(device)
x_test_tensor = torch.from_numpy(x_test_norm).float().to(device)
y_test_tensor = torch.from_numpy(y_test_norm).long().to(device)

In [6]:
# D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
D_in, H, D_out = 32*32*3, 1024, 10

In [7]:
# Build another NN model
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Define ReLU activation
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        # Inputs to 1st linearn hidden layer linear transformation
        self.linear1 = nn.Linear(D_in, H)
        # 1st hidden layer to 2nd linear hidden layer linear transformation
        self.linear2 = nn.Linear(H, H)
        # Define output layer, 10 units - one for each digit
        self.output = nn.Linear(H, D_out)
        # Define softmax output
        self.softmax = nn.Softmax(dim = 1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.relu1(x)
        x = self.relu2(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x


In [8]:
model = Network()
print(model)

Network(
  (relu1): ReLU()
  (relu2): ReLU()
  (linear1): Linear(in_features=3072, out_features=1024, bias=True)
  (linear2): Linear(in_features=1024, out_features=1024, bias=True)
  (output): Linear(in_features=1024, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


In [9]:
# Training

# 1000 minibatches of size 128 for a total of 128,000 observations
n_epochs = 1
batch_size = 128

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=0.003)
# Define loss
# criterion = nn.NLLLoss()
criterion = nn.CrossEntropyLoss()

# Define accuracy = correct / total
train_total = 0
train_correct = 0

# Define steps & print_every
steps = 0
print_every = 100

model.train()
for epoch in range(n_epochs):
    # set initial running loss
    running_loss = 0
    # get a permutation for batch sampling
    permutation = torch.randperm(y_train_tensor.size()[0])
    
    # for i in range(0, y_train_tensor.size()[0], batch_size):
    for i in range(0, 12800, batch_size):
        steps += 1
        # sample a batch
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = x_train_tensor[indices,:], y_train_tensor[indices]
        batch_y.squeeze_(1)
        # zero grad for each batch to start with
        optimizer.zero_grad()
        # run model
        outputs = model.forward(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        # accessing training result information
        running_loss += loss.item()
        train_total += batch_y.size()[0]
        _, train_predicted = torch.max(outputs, 1)
        train_correct += (train_predicted == batch_y).sum().item()
        # testing
        # if steps % print_every == 0:
            

In [None]:
# Model validation
test_loss = 0
test_accuracy = 0
test_correct = 0
test_total = 0
model.eval()
with torch.no_grad():
    y_test_tensor.squeeze_(1)
    test_outputs = model.forward(x_test_tensor)
    batch_loss = criterion(test_outputs, y_test_tensor)
    test_loss += batch_loss.item()
                
    test_total += y_test_tensor.size()[0]
    _, test_predicted = torch.max(test_outputs, 1)
    test_correct += (test_predicted == y_test_tensor).sum().item()


In [None]:
print(running_loss)
print('train_accuracy', train_correct/train_total)
print('test_accuracy', test_correct/test_total)
print('train_loss', running_loss/128000)
print('test_loss', test_loss/len(y_test_tensor))

In [None]:
print(test_loss)

In [None]:
(y_train_tensor.size())

In [11]:
(batch_y.shape, outputs.shape, batch_x.shape)

(torch.Size([128]), torch.Size([128, 10]), torch.Size([128, 3072]))

In [None]:
(y_train)

In [None]:
(y_train.shape)