# CSCI 6390: Assignment 7

## Due November 15th

### By: Nicholas Lutrzykowski

The goal of this assignment is to implement the RNN algorithm


In [None]:
# Import Statements 
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import softmax
from scipy.special import logsumexp
import os

### Import and setup data


In [3]:
def read_data(filename): 
    res = ''
    with open(filename, 'r') as f:
        str = f.read()
        
        res = str.split(' \n')[1:]
    
    for i in range(len(res)): 
        #res[i] = np.array(res[i].split())
        res[i] = res[i].split('\n')[:-1]
        
        for j in range(len(res[i])):
            res[i][j] = np.array(res[i][j].split(), dtype=float)
    
    return res    

In [4]:
def get_y(num):
    result = np.array([], dtype=np.int8)
    for i in range(10):
        result = np.concatenate((result, np.ones((num,), dtype=np.int8)*i))
    
    one_hot = np.zeros((result.size, result.max()+1))
    one_hot[np.arange(result.size), result] = 1
    
    return one_hot

In [5]:
test = read_data('Test_Arabic_Digit.txt')
train = read_data('Train_Arabic_Digit.txt')

y_test = get_y(int(len(test)/10))
y_train = get_y(int(len(train)/10))

print("Test data set size:", len(test))
print("Train data set size:", len(train))

'''
The data is in the following format: 
data[i] is a single sequence (tao x 13 shape)
data[i][j] is a single sample in time of a sequence (1 x 13 shape)
'''
#y_test = np.reshape(y_test, (y_test.shape[0], 1))
#y_train = np.reshape(y_train, (y_train.shape[0], 1))
# y shape is (# points, 10)

train = np.reshape(np.array(train), (len(train), 1))
train = np.concatenate((train, y_train), axis=1)

test = np.reshape(np.array(test), (len(test), 1))
test = np.concatenate((test, y_test), axis=1)



Test data set size: 2200
Train data set size: 6600


### RNN-Training


In [56]:
def RNN_training(D, eta, maxiter, d, m, p):

    bh = np.random.uniform(low=-0.01, high=0.01, size=m)
    bo = np.random.uniform(low=-0.01, high=0.01, size=p)
    
    Wi = np.random.uniform(low=-0.01, high=0.01, size=(d, m))
    Wh = np.random.uniform(low=-0.01, high=0.01, size=(m, m))
    Wo = np.random.uniform(low=-0.01, high=0.01, size=(m, p))
    
    
    r = 0
    avg_cross_entropy = 0
    while r < maxiter:
        np.random.shuffle(D)
        cross_entropy = 0 
        i = 0
        for seq in D:
            
            i += 1
            # Initialize the training sequence
            X = np.array(seq[0])
            Y = seq[1:]
            tao = len(X)

            # Forward propogation
            h = np.zeros((tao+1, m)) # Initialize the hidden state
            o = np.zeros((tao, p)) # Initialize the output state
            h[0, :] = np.maximum(0, np.matmul(Wi.T, X[0, :]) + np.matmul(Wh.T, h[0, :]) + bh)
            for t in range(tao):    
                
                h[t+1, :] = np.maximum(0, np.matmul(Wi.T, X[t, :]) + np.matmul(Wh.T, h[t, :]) + bh)
                
                o[t, :] = softmax(np.matmul(Wo.T, h[t+1, :]) + bo)
                #o[t, :] = (np.matmul(Wo.T, h[t, :]) + bo) - logsumexp(np.matmul(h, Wo) + bo)
            
            # Backward propogation
            deltao = np.zeros((tao,p)) # Net gradients at output
            
            for t in range(tao-1, -1, -1):
                #deltaf = np.where(o[t, :] > 0, 1, 0) # CHANGE THIS TO SOFTMAX DERIVATIVE
                deltaf = (o[t,:]) - logsumexp(o)
                #o[t,:] = np.where(o[t,:] == 0, 1e-8, o[t,:])
                #Error = np.multiply(Y, np.log(1/o[t,:])) + np.multiply((1-Y), np.log(1/(1-o[t,:])))
                Error = o[t, :] - Y
                deltao[t, :] = np.multiply(deltaf, Error)
            
            
            deltah = np.zeros((tao,m)) # Net gradients at ht
            deltah[-1, :] = np.multiply(np.where(h[-1, :] > 0, 1, 0), np.matmul(Wo, deltao[-1,:]))
            
            deltaf = np.where(h > 0, 1, 0)
            for t in range(tao-1, 0, -1): 
                deltah[t, :] = np.multiply(deltaf[t, :], np.matmul(Wo, deltao[t, :]) + np.matmul(Wo, deltao[t, :]))
            
            # Gradients 
            bo_dif = np.sum(deltao, axis=0) 
            wo_dif = np.matmul(h[1:,:].T, deltao)
            bh_dif = np.sum(deltah, axis=0)
            wh_dif = np.matmul(h[:-1,:].T, deltah)
            wi_dif = np.matmul(X.T, deltah)
            
            # Gradient Descent Step
            bo = bo - eta*bo_dif
            Wo = Wo - eta*wo_dif
            bh = bh - eta*bh_dif
            Wh = Wh - eta*wh_dif
            Wi = Wi - eta*wi_dif
            
            o = np.where(o == 0, 1e-9, o)
            cross_entropy += np.sum(np.multiply(Y, np.log(1/o)) + np.multiply((1-Y), np.log(1/(1-o))))
            
        avg_cross_entropy += cross_entropy/D.shape[0]
        r += 1
    
    print("The average cross entropy on the training data set is:", avg_cross_entropy/maxiter)
    
    return bo, Wo, bh, Wh, Wi
    

In [61]:
bo, Wo, bh, Wh, Wi = RNN_training(train, 1e-7, 20, 13, 16, 10)


The average cross entropy on the training data set is: 129.8458505281867


In [58]:
def training_accuracy(D, bo, Wo, bh, Wh, Wi):
    accuracy = 0
    cross_entropy = 0
    for seq in D:
        
        # Initialize the training sequence
        X = np.array(seq[0])
        Y = seq[1:]
        tao = len(X)

        # Forward propogation
        h = np.zeros((tao+1, bh.shape[0])) # Initialize the hidden state
        o = np.zeros((tao, bo.shape[0])) # Initialize the output state
        h[0, :] = np.maximum(0, np.matmul(Wi.T, X[0, :]) + np.matmul(Wh.T, h[0, :]) + bh)
        for t in range(tao):    
            
            h[t+1, :] = np.maximum(0, np.matmul(Wi.T, X[t, :]) + np.matmul(Wh.T, h[t, :]) + bh)
            
            o[t, :] = softmax(np.matmul(Wo.T, h[t+1, :]) + bo)
        
        if np.argmax(o[-1,:]) == np.argmax(Y):
            accuracy += 1 
            
        cross_entropy += np.sum(np.multiply(Y, np.log(1/o)) + np.multiply((1-Y), np.log(1/(1-o))))
    
    return (accuracy/D.shape[0]), cross_entropy/D.shape[0]
       
            

In [59]:
train_accuracy, loss_train = training_accuracy(train, bo, Wo, bh, Wh, Wi)
test_accuracy, loss_test = training_accuracy(test, bo, Wo, bh, Wh, Wi)

print("The training accuracy is:", train_accuracy)
print("The training loss is:", loss_train)
print("The test accuracy is:", test_accuracy)
print("The test loss is:", loss_test)

The training accuracy is: 0.1
The training loss is: 129.73645807625448
The test accuracy is: 0.1
The test loss is: 128.72277763245887


Based on the training and test accuracy, it appears that my model is not actually learning. I implemented the correct RNN structure, but was unable to figure out which calculation I have as incorrect in the backpropogation. 