### SET ENVIRONMENT

In [None]:
# import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# note filepaths
PATH_TRAIN_LABELS = '../datasets/train_labels.csv'
PATH_TRAIN_RAW = '../datasets/train_images.npy'
PATH_TEST_RAW = '../datasets/test_images.npy'
PATH_TRAIN = '../datasets/train_images_preprocessed.npy'
PATH_TEST = '../datasets/train_images_preprocessed.npy'

In [None]:
# note constants
np.random.seed(19) 
IMG_SIZE = 35
K = 4 # number of folds

### IMPORT DATASETS

In [None]:
train_labels = np.array(pd.read_csv(PATH_TRAIN_LABELS, delimiter=",", header=0, index_col=0))
train_images = np.load(PATH_TRAIN, encoding="latin1")
test_images = np.load(PATH_TEST, encoding="latin1")

### PROCESS DATA TO DESIRED FORMAT

In [None]:
# label encoding/decoding
cat0 = sorted(['sink','pear','moustache','nose','skateboard','penguin','peanut','skull','panda',
        'paintbrush', 'nail','apple','rifle','mug','sailboat','pineapple','spoon','rabbit',
        'shovel','rollerskates','screwdriver','scorpion','rhinoceros','pool','octagon',
        'pillow','parrot','squiggle','mouth','empty','pencil'])
cat1 = {i: cat0[i] for i in range(len(cat0))}
cat2 = {cat0[i]: i for i in range(len(cat0))}
print(cat1)

In [None]:
# format data
input_y = np.array([cat2[train_labels[i][0]] for i in range(len(train_labels))])
input_x = np.array([train_images[:,1][i] for i in range(len(train_images))], dtype=float)
output_x = np.array([test_images[:,1][i] for i in range(len(test_images))], dtype=float)
input_x /= 255.0
output_x /= 255.0
input_data = np.array([[input_x[i].tolist(), input_y[i].tolist()] for i in range(len(input_x))])
output_data = np.array([[output_x[i].tolist(), int(0)] for i in range(len(output_x))])

In [None]:
print(input_x.shape)

In [None]:
# create 80/10/10 splits
def create_splits(data, k):
    data = data[np.random.permutation(len(data))]
    # create test
    n = data.shape[0]
    test = data[int(np.ceil(k*n/(k+1))):int(np.ceil((k+1)*n/(k+1)))]
    test_x = np.array([test[:,0][j] for j in range(len(test[:,0]))], dtype=float)
    test_y = np.array([[test[:,1][j]] for j in range(len(test[:,1]))], dtype=float)
    # create train and valid
    n = data.shape[0]-test.shape[0]
    train_x, train_y, valid_x, valid_y = [], [], [], []
    for i in range(k):
        t = data[int(np.ceil(i*n/k)):int(np.ceil((i+1)*n/k))]
        v = data[np.delete(np.arange(0,n),np.arange(int(np.ceil(i*n/k)),int(np.ceil((i+1)*n/k))))]
        v_x = np.array([v[:,0][j] for j in range(len(v[:,0]))], dtype=float)
        v_y = np.array([v[:,1][j] for j in range(len(v[:,1]))], dtype=float)
        t_x = np.array([t[:,0][j] for j in range(len(t[:,0]))], dtype=float)
        t_y = np.array([t[:,1][j] for j in range(len(t[:,1]))], dtype=float)
        valid_x.append(v_x)
        valid_y.append(v_y)
        train_x.append(t_x)
        train_y.append(t_y)
    return train_x, train_y, valid_x, valid_y, test_x, test_y

In [None]:
# prepare for cross validation
train_x, train_y, valid_x, valid_y, test_x, test_y = create_splits(input_data, k=K)

In [None]:
print(train_x[0].shape)
print(valid_x[0].shape)
print(test_x.shape)

### NEURAL NETWORK

In [None]:
# parameters
list_rates = [5e-1] # learning rate
list_nodes = [250, 350, 450, 550, 650, 750, 850, 950] # number of nodes per layer
list_layers = [1, 2, 3, 4] # number of hidden layers
batch_size = [1/100, 1/10, 1]
activation = ['sigmoid', 'tanh', 'relu']

In [None]:
# sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-1 * x))

# derivative of our sigmoid function, in terms of the output (i.e. y)
def dsigmoid(x):
    return 1.0 - x**2

# make a matrix 
def matrix(m, n, fill=0.0):
    return np.zeros(shape=(m,n)) + fill

# make a random matrix
def rand_matrix(m, n, a=0, b=1):
    return np.random.rand(m, n) * (b - a) + a

# use logistic regression loss function 
def loss_fn(predict, truth):
    n = len(truth)
    loss = (- 1 / n) * np.sum(truth * np.log(predict) + (1 - truth) * (np.log(1 - predict)))
    loss = np.squeeze(loss)
    return loss

In [None]:
class NN:
    def __init__(self, ni, nh, no, lr):
        self.lr = lr
        # number of input, hidden, and output nodes
        self.ni = ni
        self.nh = nh
        self.no = no
        
        # bias vectors 
        # self.bh = np.zeros((1, self.nh))
        # self.bo = np.zeros((1, self.no))
        self.bh = np.ones(self.nh)
        self.bo = np.ones(self.no)

        # create weights
        # default to range (-0.5, 0.5)
        self.wh = rand_matrix(self.ni, self.nh, -0.5, 0.5)
        self.wo = rand_matrix(self.nh, self.no, -0.5, 0.5)
    
    # training feed forward, obtain output from weight matrices and bias vectors
    def propagate(self, inputs):
        self.ai = inputs
        # ---- can add loop here for more hidden layers ----
        # hidden layers activations
        self.ah = np.dot(self.ai, self.wh) + self.bh
        # hidden output 
        self.oh = np.tanh(self.ah)
        # output layers activations
        self.ao = np.dot(self.ah, self.wo) + self.bo  
        #h output layers output 
        self.oo = sigmoid(self.ao)

    # training back propagation, updates neural network's weight matrices and bias vectors
    def backPropagate(self, x, y, eta):
        n = x.shape[0]
        self.dao = self.oo - y
        self.dwo = np.dot(self.oh.T, self.dao) / n
        self.dbo = np.sum(self.dao) / n
        self.dah = np.dot(self.dao, self.wo.T)*(1-np.tanh(self.ah))
        self.dwh = np.dot(x.T, self.dah) / n
        self.dbh = np.sum(self.dah) / n
        # update weights using gradient descent method. learning rate = eta
        self.wo = self.wo - eta * self.dwo
        self.wh = self.wh - eta * self.dwh
        self.bo = self.bo - eta * self.dbo
        self.bh = self.bh - eta * self.dbh 
        
    def predict(self, x):
        ah = np.dot(x, self.wh) + self.bh
        # hidden layers output 
        oh = np.tanh(ah)
        # output layers activations
        ao = np.dot(ah, self.wo) + self.bo  
        # output layers output 
        oo = sigmoid(ao)
        return oo
      
    # takes in Y     
    def train(self, X, Y, iterations = 1000):
        trend = []
        eta = self.lr
        # create output matrix
        Y_m = np.zeros((X.shape[0], 31))
        for i in range(len(Y)):
            Y_m[i][int(Y[i])] = 1
        for i in range(iterations):
            output = self.propagate(X)
            self.backPropagate(X, Y_m, eta)
            pred = np.argmax(self.oo, axis=1)
            loss = loss_fn(self.oo, Y_m)
            diff = Y - pred
            acc = (diff == 0).sum() / len(Y)
            if( i % (iterations / 100) == 0): 
                trend.append([acc, loss])
                print("iteration", i, ":\t", "train acc:", acc)
        return trend

In [None]:
list_models = []
for r in list_rates:
    for n in list_nodes:
        list_models.append(NN(ni=1225, nh=n, no=31, lr=r))

In [None]:
best_pred = (0, 0, []) # [accuracy, nn, training trend]
trends = []

for nn in list_models:
    for i in range(K):
        t_x, t_y, v_x, v_y = train_x[i], train_y[i], valid_x[i], valid_y[i]
        res = nn.train(t_x, t_y, 2000)
        # validate with validation set after the training
        v_o = nn.predict(v_x)
        pred = np.argmax(v_o, axis=1)
        diff = v_y - pred
        acc = (diff == 0).sum() / len(v_y)
        print(f"MODEL: lr ({nn.lr}), nh ({nn.nh})")
        print("valid fold:", i, "|", "valid acc:", acc)
    if(acc > best_pred[0]): best_pred = (acc, nn, res) 
    trends.append(best_pred[2])

res = np.array(best_pred[2])
epoch = np.arange(len(res))