In [223]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import autograd
import torch.nn.functional as F

images = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/images.npy")
labels = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/labels.npy")
test = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/test_images.npy")
height = images.shape[1]
width = images.shape[2]
size = height * width
images = (images - images.mean()) / images.std()
data = images.reshape(images.shape[0],size)
data = torch.from_numpy(data).float().cuda()
labels = torch.from_numpy(labels).float().cuda()
test_data = test.reshape(test.shape[0], size)
test_data = (test_data - test_data.mean()) / test_data.std()
batch_size = 1
NUM_OPT_STEPS = 5000
train_seqs, train_labels = data[0:45000,:], labels[0:45000]
val_seqs, val_labels = data[45000:,:], labels[45000:]

In [213]:
class TwoLayerNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = torch.nn.Linear(height * width, 100)
        self.layer_2 = torch.nn.Linear(100, 5)
    def forward(self, x):
        x = self.layer_1(x)
        y = F.relu(x)
        z = self.layer_2(y)
        return z

In [214]:
class ThreeLayerNN(torch.nn.Module):
    def __init__(self, layer_1, layer_2):
        super().__init__()
        self.layer_1 = torch.nn.Linear(height * width, layer_1)
        self.layer_2 = torch.nn.Linear(layer_1, layer_2)
        self.layer_3 = torch.nn.Linear(layer_2, 5)
        #self.drop = torch.nn.Dropout(p = 0.3)
    def forward(self, x):
        #x = self.drop(x)
        x = self.layer_1(x)
        y = F.relu(x)
        z = self.layer_2(y)
        #z = self.drop(z)
        a = F.relu(z)
        b = self.layer_3(a)
        return b

In [215]:
def train(model, optimizer, batch_size):
    # model.train() puts our model in train mode, which can require different
    # behavior than eval mode (for example in the case of dropout).
    model.train()
    
    # i is is a 1-D array with shape [batch_size]
    i = np.random.choice(train_seqs.shape[0], size=batch_size, replace=False)
    i = torch.from_numpy(i).long().cuda()
    x = autograd.Variable(train_seqs[i, :])
    y = autograd.Variable(train_labels[i]).long()
    optimizer.zero_grad()
    y_hat_ = model(x)
    loss = F.cross_entropy(y_hat_, y)
    loss.backward()
    optimizer.step()
    return loss.data[0]


In [216]:
def accuracy(y, y_hat):
    return (y == y_hat).astype(np.float).mean()

In [217]:
def approx_train_accuracy(model):
    i = np.random.choice(train_seqs.shape[0], size=1000, replace=False)
    i = torch.from_numpy(i).long().cuda()
    x = autograd.Variable(train_seqs[i, :])
    y = autograd.Variable(train_labels[i]).long()
    y_hat_ = model(x)
    pred = []
    for j in range(y_hat_.size()[0]):
        logits = y_hat_[j,:].cpu().data.numpy()
        pred.append(np.argmax(logits))
    return accuracy(pred, y.data.cpu().numpy())

In [218]:
def val_accuracy(model):
    x = autograd.Variable(val_seqs)
    y = autograd.Variable(val_labels)
    y_hat_ = model(x)
    pred = []
    for j in range(y_hat_.size()[0]):
        logits = y_hat_[j,:].cpu().data.numpy()
        pred.append(np.argmax(logits))
    return accuracy(pred, y.data.cpu().numpy())

In [219]:
def plot_accuracies(tr, v):
    ind = list(range(len(tr)))
    plt.plot(ind,tr,'-ro')
    plt.title('Training accuracy as a function of iteration')
    plt.xlabel('iteration')
    plt.ylabel('training accuracy')
    plt.show()
    plt.plot(ind,v,'-go')
    plt.title('Validation accuracy as a function of iteration')
    plt.xlabel('iteration')
    plt.ylabel('validation accuracy')
    plt.show()

In [220]:
def tune_hyperparameters(layer_1, layer_2, batch, rate, step):
    best_avg = 0
    l_1, l_2, bat, rat, stp = 0, 0, 0, 0, 0
    for i in layer_1:
        for j in layer_2:
            for k in batch:
                for l in rate:
                    for m in step:
                        train_accs, val_accs = [], []
                        model = ThreeLayerNN(i, j)
                        model.cuda()
                        optimizer = torch.optim.Adam(model.parameters(), lr=l)
                        for n in range(m):
                            train(model, optimizer, k)
                            if n % 100 == 0:
                                train_accs.append(approx_train_accuracy(model))
                                val_accs.append(val_accuracy(model))
                        avg = np.mean(val_accs)
                        print(avg ," ", k)
                        if avg > best_avg:
                            best_avg = avg
                            l_1, l_2, bat, rat, stp = i, j, k, l, m
                        #for s in model.children():
                          #  s.reset_parameters()
    
    return l_1, l_2, bat, rat, stp

In [221]:
layer_1 = [700]
layer_2 = [50]
batch = [32]
rate = [0.00001]
step = [10000]

In [222]:
tuned = tune_hyperparameters(layer_1, layer_2, batch, rate, step)

KeyboardInterrupt: 

In [None]:
model = ThreeLayerNN(tuned[0], tuned[1])
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=tuned[3])                  
train_accs, val_accs = [], []
for i in range(tuned[4]):
    train(model, optimizer, tuned[2])
    if i % 100 == 0:
        train_accs.append(approx_train_accuracy(model))
        val_accs.append(val_accuracy(model))
        print("%6d %5.2f %5.2f" % (i, train_accs[-1], val_accs[-1]))
print(np.mean(val_accs))
plot_accuracies(train_accs, val_accs)
print(tuned)