In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import autograd
import torch.nn.functional as F

images = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/images.npy")
labels = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/labels.npy")
test = np.load("D:/work/JHUschoolStuff/machinelearning/project1/cs475_project_data/test_images.npy")
height = images.shape[1]
width = images.shape[2]
size = height * width
images = (images - images.mean()) / images.std()
data = images.reshape(images.shape[0],size)
data = torch.from_numpy(data).float().cuda()
labels = torch.from_numpy(labels).float().cuda()
test_data = test.reshape(test.shape[0], size)
test_data = (test_data - test_data.mean()) / test_data.std()
batch_size = 1
NUM_OPT_STEPS = 5000
train_seqs, train_labels = data[0:45000,:], labels[0:45000]
val_seqs, val_labels = data[45000:,:], labels[45000:]
NUM_CLASSES = 5

class TooSimpleConvNN(torch.nn.Module):
    def __init__(self, chan_1, chan_2, chan_3):
        super().__init__()
        # 3x3 convolution that takes in an image with one channel
        # and outputs an image with 8 channels.
        self.conv1 = torch.nn.Conv2d(1, chan_1, kernel_size=3)
        # 3x3 convolution that takes in an image with 8 channels
        # and outputs an image with 16 channels. The output image
        # has approximately half the height and half the width
        # because of the stride of 2.
        self.conv2 = torch.nn.Conv2d(chan_1, chan_2, kernel_size=3, stride=1)
        self.conv3 = torch.nn.Conv2d(chan_2, chan_3, kernel_size=3, stride=1)
        # 1x1 convolution that takes in an image with 16 channels and
        # produces an image with 5 channels. Here, the 5 channels
        # will correspond to class scores.
        self.final_conv = torch.nn.Conv2d(chan_3, 5, kernel_size=1)
    def forward(self, x):
        # Convolutions work with images of shape
        # [batch_size, num_channels, height, width]
        x = x.view(-1, height, width).unsqueeze(1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        n, c, h, w = x.size()
        x = F.avg_pool2d(x, kernel_size=[h, w])
        x = self.final_conv(x).view(-1, NUM_CLASSES)
        return x


In [None]:
class TooSimpleConvNN(torch.nn.Module):
    def __init__(self, chan_1, chan_2):
        super().__init__()
        # 3x3 convolution that takes in an image with one channel
        # and outputs an image with 8 channels.
        self.conv1 = torch.nn.Conv2d(1, chan_1, kernel_size=1)
        # 3x3 convolution that takes in an image with 8 channels
        # and outputs an image with 16 channels. The output image
        # has approximately half the height and half the width
        # because of the stride of 2.
        self.conv2 = torch.nn.Conv2d(chan_1, chan_2, kernel_size=1, stride=1)
        # 1x1 convolution that takes in an image with 16 channels and
        # produces an image with 5 channels. Here, the 5 channels
        # will correspond to class scores.
        self.final_conv = torch.nn.Conv2d(chan_2, 5, kernel_size=1)
    def forward(self, x):
        # Convolutions work with images of shape
        # [batch_size, num_channels, height, width]
        x = x.view(-1, height, width).unsqueeze(1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        n, c, h, w = x.size()
        x = F.avg_pool2d(x, kernel_size=[h, w])
        x = self.final_conv(x).view(-1, NUM_CLASSES)
        return x


model = TooSimpleConvNN(16, 32)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

In [None]:
def train(model, optimizer, batch_size):
#def train(batch_size):
    # model.train() puts our model in train mode, which can require different
    # behavior than eval mode (for example in the case of dropout).
    model.train()
    # i is is a 1-D array with shape [batch_size]
    i = np.random.choice(train_seqs.shape[0], size=batch_size, replace=False)
    i = torch.from_numpy(i).long().cuda()
    x = autograd.Variable(train_seqs[i, :])
    y = autograd.Variable(train_labels[i]).long()
    optimizer.zero_grad()
    y_hat_ = model(x)
    loss = F.cross_entropy(y_hat_, y)
    loss.backward()
    optimizer.step()
    return loss.data[0]


In [None]:
def accuracy(y, y_hat):
    return (y == y_hat).astype(np.float).mean()

In [None]:
def approx_train_accuracy(model):
    i = np.random.choice(train_seqs.shape[0], size=1000, replace=False)
    i = torch.from_numpy(i).long().cuda()
    x = autograd.Variable(train_seqs[i, :])
    y = autograd.Variable(train_labels[i]).long()
    y_hat_ = model(x)
    pred = []
    for j in range(y_hat_.size()[0]):
        logits = y_hat_[j,:].cpu().data.numpy()
        pred.append(np.argmax(logits))
    return accuracy(pred, y.data.cpu().numpy())

In [None]:
def val_accuracy(model):
    x = autograd.Variable(val_seqs)
    y = autograd.Variable(val_labels)
    y_hat_ = model(x)
    pred = []
    for j in range(y_hat_.size()[0]):
        logits = y_hat_[j,:].cpu().data.numpy()
        pred.append(np.argmax(logits))
    return accuracy(pred, y.data.cpu().numpy())

In [None]:
def plot_accuracies(tr, v):
    ind = list(range(len(tr)))
    plt.plot(ind,tr,'-ro')
    plt.title('Training accuracy as a function of iteration')
    plt.xlabel('iteration')
    plt.ylabel('training accuracy')
    plt.show()
    plt.plot(ind,v,'-go')
    plt.title('Validation accuracy as a function of iteration')
    plt.xlabel('iteration')
    plt.ylabel('validation accuracy')
    plt.show()

def tune_hyperparameters(layer_1, layer_2, layer_3, batch, rate, step):
    best_avg = 0
    l_1, l_2, l_3, bat, rat, stp = 0, 0, 0, 0, 0, 0
    for i in layer_1:
        for j in layer_2:
            for r in layer_3:
                for k in batch:
                    for l in rate:
                        for m in step:
                            train_accs, val_accs = [], []
                            model = TooSimpleConvNN(i, j, r)
                            model.cuda()
                            optimizer = torch.optim.Adam(model.parameters(), lr=l)
                            for n in range(m):
                                train(model, optimizer, k)
                                if n % 100 == 0:
                                    train_accs.append(approx_train_accuracy(model))
                                    val_accs.append(val_accuracy(model))
                            avg = np.mean(val_accs)
                            print(avg ," ", k)
                            if avg > best_avg:
                                best_avg = avg
                                l_1, l_2, l_3, bat, rat, stp = i, j, r, k, l, m
                            #for s in model.children():
                              #  s.reset_parameters()
    
    return l_1, l_2, l_3, bat, rat, stp

In [None]:
def tune_hyperparameters(layer_1, layer_2, batch, rate, step):
    best_avg = 0
    l_1, l_2, l_3, bat, rat, stp = 0, 0, 0, 0, 0, 0
    for i in layer_1:
        for j in layer_2:
            for k in batch:
                for l in rate:
                    for m in step:
                        train_accs, val_accs = [], []
                        model = TooSimpleConvNN(i, j)
                        model.cuda()
                        optimizer = torch.optim.Adam(model.parameters(), lr=l)
                        for n in range(m):
                            train(model, optimizer, k)
                            if n % 100 == 0:
                                train_accs.append(approx_train_accuracy(model))
                                val_accs.append(val_accuracy(model))
                        avg = np.mean(val_accs)
                        print(avg ," ", k)
                        if avg > best_avg:
                            best_avg = avg
                            l_1, l_2, bat, rat, stp = i, j, k, l, m
                            #for s in model.children():
                              #  s.reset_parameters()
    
    return l_1, l_2, bat, rat, stp

layer_1 = [32]
layer_2 = [32]
layer_3 = [8]
batch = [1024]
rate = [0.001]
step = [5000]

In [None]:
layer_1 = [1]
layer_2 = [1]
batch = [1]
rate = [0.001]
step = [10000]

tuned = tune_hyperparameters(layer_1, layer_2, layer_3, batch, rate, step)

In [None]:
tuned = tune_hyperparameters(layer_1, layer_2, batch, rate, step)

model = TooSimpleConvNN(tuned[0], tuned[1], tuned[2])
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=tuned[4])                  
train_accs, val_accs = [], []
for i in range(tuned[5]):
    train(model, optimizer, tuned[3])
    if i % 100 == 0:
        train_accs.append(approx_train_accuracy(model))
        val_accs.append(val_accuracy(model))
        print("%6d %5.2f %5.2f" % (i, train_accs[-1], val_accs[-1]))
print(np.mean(val_accs))
plot_accuracies(train_accs, val_accs)
print(tuned)

In [None]:
model = TooSimpleConvNN(tuned[0], tuned[1])
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=tuned[3])                  
train_accs, val_accs = [], []
for i in range(tuned[4]):
    train(model, optimizer, tuned[2])
    if i % 100 == 0:
        train_accs.append(approx_train_accuracy(model))
        val_accs.append(val_accuracy(model))
        print("%6d %5.2f %5.2f" % (i, train_accs[-1], val_accs[-1]))
print(np.mean(val_accs))
plot_accuracies(train_accs, val_accs)
print(tuned)

(32, 64, 128, 0.001, 5000) 70 to 80
(32, 16, 8, 512, 0.001, 5000) 86

train_accs, val_accs = [], []
for i in range(NUM_OPT_STEPS):
    train(batch_size)
    if i % 100 == 0:
        train_accs.append(approx_train_accuracy(model))
        val_accs.append(val_accuracy(model))
        print("%6d %5.2f %5.2f" % (i, train_accs[-1], val_accs[-1]))
print(np.mean(val_accs))
plot_accuracies(train_accs, val_accs)
print(tuned)