In [63]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
#import project modules
from modules import *

In [65]:
def normalize_dataset(stock_market_dataset):
    normalizer = preprocessing.MinMaxScaler((0,1))
    smd_exclude_strings = stock_market_dataset.select_dtypes(include = [np.number])
    normalized = normalizer.fit_transform(smd_exclude_strings)
    normalized_df = pd.DataFrame(normalized) 
    return [normalized, normalized_df, normalizer]

def denormalize_data(df, normalizer, time_step, predictions, close_value_col_index):
    df_copy = df.copy(deep = True)
    rows_to_drop = [i for i in range(0,time_step)]
    df_copy.drop(rows_to_drop,inplace = True)     
    #deleting first #time_step rows of normalized_df. Because output prediction for the first 30 days (first time slice) doesn't exist.

    #converting prediction array to dataframe for replacing actual row with prediction row
    predicted_normalised_values_df = pd.DataFrame(predictions) 

    #replacing
    df_copy[close_value_col_index] = predicted_normalised_values_df[0].values
    
    temp = normalizer.inverse_transform(df_copy)
    df_copy = pd.DataFrame(temp)
    return df_copy

def make_train_test_val_sets(dataarray, num_features, close_value_col_index, time_step, train_per, val_per, test_per):
    n = dataarray.shape[0] 

    #slices of data into time_steps 
    X_slice=[]
    y_slice=[]
    #normalized df dim 3125, 13
    # i = 30 - 3125

    for i in range(time_step, n):
        X_slice.append(dataarray[ i-time_step:i ,  0:num_features ])      #1 example having data from 30 days dim 30x13
        y_slice.append(dataarray[i,close_value_col_index])                  #close value data of the 31st day 

    #splitting percentage

    #splitting slices for test,val,train and converting into np array
    X_train=np.array( X_slice[ 0:int(n*train_per) ])
    y_train=np.array( y_slice[ 0:int(n*train_per) ])

    X_val=np.array( X_slice[ int(n*train_per):int(n*(train_per+val_per)) ])
    y_val=np.array( y_slice[ int(n*train_per):int(n*(train_per+val_per)) ])

    X_test=np.array( X_slice[ int(n*(train_per+val_per)): ])
    y_test=np.array( y_slice[ int(n*(train_per+val_per)): ])
    
    print(np.array(X_train).shape)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], num_features))    
    #dimension of X_train 3095, 30, 13
    #dimension of y_train  
    print(np.array(X_train).shape)
    return [X_train, y_train, X_val, y_val, X_test, y_test]

In [67]:
data_set_file = 'Google.csv'
stock_market_dataset = pd.read_csv(data_set_file)
stock_market_dataset.shape
stock_market_dataset.head(3125)
stock_market_dataset.columns.tolist()

###normalize data
normalized_array, normalized_df, normalizer = normalize_dataset(stock_market_dataset)
normalized_df.head()
n = normalized_df.shape[0]

###split data set into train, cross validation and test set

num_features = 6 
close_value_col_index = 3
#creating training set with time steps.

time_step=30    #1 month time step
train_per = 0.7
val_per = 0.2
test_per = 1-(train_per+val_per)  #0.1
    
X_train, y_train, X_val, y_val, X_test, y_test =  make_train_test_val_sets(normalized_array, num_features, close_value_col_index, time_step, train_per, val_per, test_per)

print(str(X_train.shape)+ " training dataset shape")
print(str(X_val.shape)+ "  val dataset shape")
print(str(X_test.shape)+ "  test dataset shape")

(2187, 30, 6)
(2187, 30, 6)
(2187, 30, 6) training dataset shape
(625, 30, 6)  val dataset shape
(283, 30, 6)  test dataset shape
(2187,)


In [44]:
def rand_arr(a, b, *args): 
    np.random.seed(0)
    return np.random.rand(*args) * (b - a) + a

In [45]:
class LstmParam:
    def __init__(self, ct_dim, x_dim):
        """
        
        initialize all weights and biases 
        ct_dim - the dimension of current cell state (Ct) matrix
        x_dim - the dimension of Tth input
        
        Weight and bias matrices are initialized with random values instead of zeroes to add noise. 
        Their derivatives will thus be zero.
        
        Terminology:
        
        Prefixes
        w - a weight matrix
        b - a bias matrix
        d - a derivative matrix
        
        Suffixes
        c - cell state gate. Represents data held in current cell. 
            It is the c't (c bar t) matrix
        i - input gate
        f - forget gate
        o - output gate        
        
        """
        self.ct_dim = ct_dim  
        self.x_dim = x_dim 
        combined_dim = x_dim + ct_dim
        
        # Initialize weights
        self.wc = rand_arr(-0.1, 0.1, ct_dim, combined_dim)
        self.wi = rand_arr(-0.1, 0.1, ct_dim, combined_dim) 
        self.wf = rand_arr(-0.1, 0.1, ct_dim, combined_dim)
        self.wo = rand_arr(-0.1, 0.1, ct_dim, combined_dim)
        
        #Initialize biases 
        self.bc = rand_arr(-0.1, 0.1, ct_dim) 
        self.bi = rand_arr(-0.1, 0.1, ct_dim) 
        self.bf = rand_arr(-0.1, 0.1, ct_dim) 
        self.bo = rand_arr(-0.1, 0.1, ct_dim) 
        
        # Initialize derivatives
        self.dwc = np.zeros((ct_dim, combined_dim)) 
        self.dwi = np.zeros((ct_dim, combined_dim)) 
        self.dwf = np.zeros((ct_dim, combined_dim)) 
        self.dwo = np.zeros((ct_dim, combined_dim)) 
        self.dbc = np.zeros(ct_dim) 
        self.dbi = np.zeros(ct_dim) 
        self.dbf = np.zeros(ct_dim) 
        self.dbo = np.zeros(ct_dim)
        
    def apply_derivatives(self, alpha = 1):
        """
        Update parameters in each iteration to reach optimal value 
        alpha is the learning rate.
        """
        self.wc -= alpha * self.dwc
        self.wi -= alpha * self.dwi
        self.wf -= alpha * self.dwf
        self.wo -= alpha * self.dwo
        self.bc -= alpha * self.dbc
        self.bi -= alpha * self.dbi
        self.bf -= alpha * self.dbf
        self.bo -= alpha * self.dbo
        
        # reset all derivatives to zero
        self.dwc = np.zeros_like(self.wc)
        self.dwi = np.zeros_like(self.wi) 
        self.dwf = np.zeros_like(self.wf) 
        self.dwo = np.zeros_like(self.wo) 
        self.dbc = np.zeros_like(self.bc)
        self.dbi = np.zeros_like(self.bi) 
        self.dbf = np.zeros_like(self.bf) 
        self.dbo = np.zeros_like(self.bo) 

In [46]:
class LstmCellState:
    def __init__(self, ct_dim, x_dim):
        """
        Initialize all gate matrices. 
        All gate matrices have the same dimension as ct matrix
        c - current hidden cell state. The gate corresponding to this determines how much 
            data of previous cell should be read in current cell.
        i - input gate. Determines how much data should be read into the cell from current input.
        f - forget gate. Determines how much data should be forgotten, i.e discarded
        o - output gate. How much data to output from current cell to next cell
        s - The present state of gate. 
            Equation to calculate present state : forget_gate*previous_state(s<t-1>) + c_gate*input_gate
        h - output state of the cell. It is the prediction value of Tth output in series.
        dh - derivative of h
        ds - derivative of s
        """
        self.c = np.zeros(ct_dim)
        self.i = np.zeros(ct_dim)
        self.f = np.zeros(ct_dim)
        self.o = np.zeros(ct_dim)
        self.s = np.zeros(ct_dim)
        self.h = np.zeros(ct_dim)
        self.dh = np.zeros_like(self.h)
        self.ds = np.zeros_like(self.s)

In [47]:
class LstmLayer:
    def __init__(self, lstm_param, lstm_cell_state):
        self.state = lstm_cell_state
        self.param = lstm_param
        #xh is the concatenation ofprevious layer's output with current input.
        self.xh = None

    def calculate_gates(self, x, s_prev = None, h_prev = None):
        """
        if this is the first lstm layer in the network then 
        s_prev and h_prev will be initialized to zero as cell state 
        and output states are not present.
        
        s_prev = cell state of previous layer's cells.
        h_prev = output state from previous layer
        
        """
        if s_prev is None: 
            s_prev = np.zeros_like(self.state.s)
        if h_prev is None: 
            h_prev = np.zeros_like(self.state.h)
        
        # save previous states for use in backprop
        self.s_prev = s_prev
        self.h_prev = h_prev

        # concatenate x(T) and h(T-1)
        xh = np.hstack((x,  h_prev))
        #Calculate gate values
        self.state.c = tanh(np.dot(self.param.wc, xh) + self.param.bc)
        self.state.i = sigmoid(np.dot(self.param.wi, xh) + self.param.bi)
        self.state.f = sigmoid(np.dot(self.param.wf, xh) + self.param.bf)
        self.state.o = sigmoid(np.dot(self.param.wo, xh) + self.param.bo)
        self.state.s = self.state.c * self.state.i + s_prev * self.state.f
        self.state.h = self.state.s * self.state.o
        self.xh = xh
    
    def calculate_derivatives(self, dh, ds):
        ds = self.state.o * dh + ds
        do = self.state.s * dh
        di = self.state.c * ds
        dc = self.state.i * ds
        df = self.s_prev * ds

        # calculate derivatives w.r.t. gate inside sigma / tanh function
        di_input = dsigmoid(self.state.i) * di 
        df_input = dsigmoid(self.state.f) * df 
        do_input = dsigmoid(self.state.o) * do 
        dc_input = dtanh(self.state.c) * dc

        # derivatives w.r.t. inputs
        self.param.dwc += np.outer(dc_input, self.xh)
        self.param.dwi += np.outer(di_input, self.xh)
        self.param.dwf += np.outer(df_input, self.xh)
        self.param.dwo += np.outer(do_input, self.xh)
        self.param.dbc += dc_input       
        self.param.dbi += di_input
        self.param.dbf += df_input       
        self.param.dbo += do_input
        
        # calculate derivative for xh
        dxh = np.zeros_like(self.xh)
        dxh += np.dot(self.param.wc.T, dc_input)
        dxh += np.dot(self.param.wi.T, di_input)
        dxh += np.dot(self.param.wf.T, df_input)
        dxh += np.dot(self.param.wo.T, do_input)
        
        # save derivatives
        self.state.ds = ds * self.state.f
        self.state.dh = dxh[self.param.x_dim:]

In [52]:
class LstmNetwork():
    def __init__(self, lstm_param):
        """
        x_list - the sequence that'll be iput to lstm
        lstm_layer_list - the ouput from layer that will be input to next layer
        """
        self.lstm_param = lstm_param
        self.lstm_layer_list = []
        self.x_list = []

    def get_loss(self, y_list, loss_layer):
        """
        Updates derivatives w.r.t corresponding loss layer. 
        To update parameters, we will call self.lstm_param.apply_derivatives()
        """
        assert len(y_list) == len(self.x_list)
        index = len(self.x_list) - 1
        # Calculate loss for the last layer 
        loss = loss_layer.loss(self.lstm_layer_list[index].state.h, y_list[index])
        diff_h = loss_layer.derivative(self.lstm_layer_list[index].state.h, y_list[index])
        # For the last layer of the network, diff_s will be
        diff_s = np.zeros(self.lstm_param.ct_dim)
        self.lstm_layer_list[index].calculate_derivatives(diff_h, diff_s)
        index -= 1

        while index >= 0:
            loss += loss_layer.loss(self.lstm_layer_list[index].state.h, y_list[index])
            diff_h = loss_layer.derivative(self.lstm_layer_list[index].state.h, y_list[index])
            diff_h += self.lstm_layer_list[index + 1].state.dh
            diff_s = self.lstm_layer_list[index + 1].state.ds
            self.lstm_layer_list[index].calculate_derivatives(diff_h, diff_s)
            index -= 1 

        return loss

    def clear_x_list(self):
        self.x_list = []

    def add_x_list(self, x):
        self.x_list.append(x)
        if len(self.x_list) > len(self.lstm_layer_list):
            lstm_state = LstmCellState(self.lstm_param.ct_dim, self.lstm_param.x_dim)
            self.lstm_layer_list.append(LstmLayer(self.lstm_param, lstm_state))

        # get index of most recent x input
        index = len(self.x_list) - 1
        if index == 0:
            self.lstm_layer_list[index].calculate_gates(x)
        else:
            s_prev = self.lstm_layer_list[index - 1].state.s
            h_prev = self.lstm_layer_list[index - 1].state.h
            self.lstm_layer_list[index].calculate_gates(x, s_prev, h_prev)

In [68]:
class LossLayer:
    """
    Computes square loss with first element of hidden layer array.
    """
    @classmethod
    def loss(self, pred, label):
        return (pred[0] - label) ** 2

    @classmethod
    def derivative(self, pred, label):
        diff = np.zeros_like(pred)
        diff[0] = 2 * (pred[0] - label)
        return diff


def example_0():
    # learns to repeat simple sequence from random inputs
    np.random.seed(0)

    # parameters for input data dimension and lstm cell count
    ct_dim = 100
    x_dim = 50
    lstm_param = LstmParam(ct_dim, x_dim)
    lstm_net = LstmNetwork(lstm_param)
    y_list = [-0.7, 0.2, 0.1, -0.9]
    input_val_arr = [np.random.random(x_dim) for _ in y_list]
    
    for cur_iter in range(100):
        print("iter", "%2s" % str(cur_iter), end=": ")
        for ind in range(len(y_list)):
            lstm_net.add_x_list(input_val_arr[ind])

        print("y_pred = [" +
              ", ".join(["% 2.5f" % lstm_net.lstm_layer_list[ind].state.h[0] for ind in range(len(y_list))]) +
              "]", end=", ")

        loss = lstm_net.get_loss(y_list, LossLayer)
        print("loss:", "%.3e" % loss)
        lstm_param.apply_derivatives(0.1)
        lstm_net.clear_x_list()



example_0()

iter  0: y_pred = [ 0.04135,  0.06930,  0.11699,  0.16562], loss: 1.703e+00
iter  1: y_pred = [-0.21158, -0.30790, -0.36603, -0.37193], loss: 9.926e-01
iter  2: y_pred = [-0.22250, -0.29354, -0.36030, -0.41698], loss: 9.168e-01
iter  3: y_pred = [-0.23755, -0.28552, -0.36098, -0.47106], loss: 8.461e-01
iter  4: y_pred = [-0.25128, -0.27542, -0.35710, -0.51905], loss: 7.814e-01
iter  5: y_pred = [-0.26383, -0.26362, -0.34902, -0.55952], loss: 7.227e-01
iter  6: y_pred = [-0.27620, -0.25156, -0.33880, -0.59443], loss: 6.694e-01
iter  7: y_pred = [-0.28887, -0.23983, -0.32745, -0.62491], loss: 6.209e-01
iter  8: y_pred = [-0.30201, -0.22865, -0.31557, -0.65164], loss: 5.765e-01
iter  9: y_pred = [-0.31565, -0.21809, -0.30353, -0.67514], loss: 5.359e-01
iter 10: y_pred = [-0.32974, -0.20817, -0.29158, -0.69580], loss: 4.987e-01
iter 11: y_pred = [-0.34416, -0.19887, -0.27989, -0.71394], loss: 4.647e-01
iter 12: y_pred = [-0.35879, -0.19014, -0.26858, -0.72984], loss: 4.334e-01
iter 13: y_p