In [1]:
# importing necessary packages

import random
import numpy as np

DICTIONARY = ['a', 'b', 'c']  # Dictionary of alphabets
NUMBER_OF_POSITIONS = 4       # number of states
START_POSITION = np.zeros(NUMBER_OF_POSITIONS) # declaring start state
START_POSITION[0] = 1                          # start position is [1 0 0 0]
NUMBER_OF_CHARS = len(DICTIONARY)              # number of alphabets
PERCENT_OF_TESTS = 0.1
TenzToAdd = 2.0
EPS = 0.01
NU = 0.7
NU_ADDER = 0.3

In [4]:
# MSE loss function (ytrue - yloss)^2
def cost_function(exp, res):                   
    return (res - exp) ** 2

# derivative of loss function (ytrue - yloss)
def cost_function_derivative(exp, res):       
    return res - exp


# Match function: takes the nn tensor, current state and the next character and performs the transition
# ch is the next character and pos is the current position
# Q x S > Q i.e next state = tensor x next character x current state
def match(nn, ch, pos):                         
    new_pos = np.zeros(NUMBER_OF_POSITIONS)         # create a new array for the next state
    for k in range(NUMBER_OF_POSITIONS):            # for each value in the new state
        for i in range (NUMBER_OF_CHARS):           # go through next character array
            for j in range (NUMBER_OF_POSITIONS):   # go through current staste array
                new_pos[k] += nn.tensor[k][i][j] * ch[i] * pos[j] # new state position is tensor x next character x current state
    return new_pos                                  # return the new state

# derivative of Match function wrt current state: takes the tensor, a gradient and next input character
# derivative (2D array)= tensor x next character
def match_derivative(nn, dz, ch):
    derivative = np.zeros([NUMBER_OF_POSITIONS, NUMBER_OF_POSITIONS]) # dim: |Q| x |Q|
    for i in range(NUMBER_OF_POSITIONS):                   # for each column in derivative i.e values in current state [p1 p2 p3 ..]
        for k in range(NUMBER_OF_POSITIONS):               # for each row in derivative i.e vlues in new state [h1 h2 h3 ..]
            for j in range (NUMBER_OF_CHARS):              # loop through current character  
                derivative[k][i] += nn.tensor[k][j][i] * ch[j] # derivative (2D array)= tensor x next character
    return np.dot(dz, derivative)                          # dot product of previous gradient with new one

# derivative of Match function wrt tensor: take gradient, next input character and current state 
# derivative(3D array) = next character x current state 
def match_derivative_tensor(dz, ch, pos):
    sample_matrix = np.zeros([NUMBER_OF_CHARS, NUMBER_OF_POSITIONS]) # create a two 2D array of next character and current state
    for i in range(NUMBER_OF_CHARS):
        for j in range(NUMBER_OF_POSITIONS):
            sample_matrix[i][j] = ch[i] * pos[j]                     # fill the sample matrix vy multiplying next character and current state
    derivative = np.zeros([NUMBER_OF_POSITIONS, NUMBER_OF_CHARS, NUMBER_OF_POSITIONS]) # new derivative as a 3D array
    for k in range(NUMBER_OF_POSITIONS): # for each row in first dimension
        derivative[k] = dz[k] * sample_matrix      # new derivative is old * sample matrix
    return derivative                              # return derivative

def normalize(t):                    
    return t / np.sum(t)

def normalize_derivative(dz, inp):
    derivative = np.zeros([NUMBER_OF_POSITIONS, NUMBER_OF_POSITIONS])
    sum = np.sum(inp)
    for i in range(NUMBER_OF_POSITIONS):
        for j in range(NUMBER_OF_POSITIONS):
            if(i == j):
                derivative[i][j] = (sum - inp[i]) / (sum ** 2)
            else:
                derivative[i][j] = -inp[i] / (sum ** 2)
    return np.dot(dz, derivative)

def lastsum(nn, x):
    return np.dot(nn.adder, x)

def lastsum_derivative(nn, dz):
    return np.dot(dz, nn.adder)

def lastsum_derivative_adder(nn, dz, inp):
    derivative = np.multiply(inp, nn.adder)
    return np.dot(dz, derivative)

def char_to_vector(ch):
    index = DICTIONARY.index(ch)
    vec = np.zeros(NUMBER_OF_CHARS)
    vec[index] = 1.0
    return vec

def cut(x):
    if (x > 1.0):
        return 1.0
    if (x < 0.0):
        return 0.0
    return x