In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
stock = pd.read_csv("stock.txt", sep=',')

In [3]:
stock.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt
0,1962-01-02,0.6277,0.6362,0.6201,0.6201,2575579,0
1,1962-01-03,0.6201,0.6201,0.6122,0.6201,1764749,0
2,1962-01-04,0.6201,0.6201,0.6037,0.6122,2194010,0
3,1962-01-05,0.6122,0.6122,0.5798,0.5957,3255244,0
4,1962-01-08,0.5957,0.5957,0.5716,0.5957,3696430,0
5,1962-01-09,0.5957,0.6037,0.5878,0.5957,2778285,0
6,1962-01-10,0.5957,0.6037,0.5957,0.5957,2337096,0
7,1962-01-11,0.5957,0.5957,0.5878,0.5957,1943605,0
8,1962-01-12,0.5957,0.6037,0.5878,0.5878,2015151,0
9,1962-01-15,0.5957,0.5957,0.5957,0.5957,2527879,0


In [4]:
stock.drop("Date", axis=1, inplace=True)
stock.drop("OpenInt", axis=1, inplace=True)
stock.drop("Volume", axis=1, inplace=True)

In [5]:
X = stock.drop("Close", axis=1)
Y = stock["Close"] 
X.head()

Unnamed: 0,Open,High,Low
0,0.6277,0.6362,0.6201
1,0.6201,0.6201,0.6122
2,0.6201,0.6201,0.6037
3,0.6122,0.6122,0.5798
4,0.5957,0.5957,0.5716


In [6]:
X.shape

(14058, 3)

In [7]:
X.shape

(14058, 3)

In [8]:
Y.shape 

(14058,)

In [9]:
# here we take reshape of y bz without it y.shape gives (14058,)
Y = np.array(Y) 
Y=Y.reshape(-1,1)
Y.shape 

(14058, 1)

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3,random_state=42)

In [11]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [12]:
def initialize_parameters(n_a,n_x,n_y):
    Wf = np.random.randn(n_a, n_a+n_x)* np.sqrt(2/n_a+n_x)
    bf = np.random.randn(n_a,1)
    Wi = np.random.randn(n_a, n_a+n_x)* np.sqrt(2/n_a+n_x)
    bi = np.random.randn(n_a,1)
    Wo = np.random.randn(n_a, n_a+n_x)* np.sqrt(2/n_a+n_x)
    bo = np.random.randn(n_a,1)
    Wc = np.random.randn(n_a, n_a+n_x)* np.sqrt(2/n_a+n_x)
    bc = np.random.randn(n_a,1)
    Wy = np.random.randn(n_y,n_a)* np.sqrt(2/n_a)
    by = np.random.randn(n_y,1)
    
#   parameters -- python dictionary containing weights and bias
    parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}
    
    return parameters

In [13]:
# we will start by implementing the LSTM cell for a single time-step. Then you can iteratively call 
# it from inside a for-loop tohave it process an input with T_x time-steps.

In [14]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    
#   xt -- your input data at timestep "t", numpy array of shape (n_x, m).
#   a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
#   c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m)
    n_x,m = xt.shape
    n_a,_ = a_prev.shape
    
    # Retrieve parameters from "parameters"
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]
    
    # Concatenate a_prev and xt (≈3 lines)
    concat = np.zeros((n_a + n_x, m))
    concat[: n_a, :] = a_prev
    concat[n_a :, :] = xt
    
    ft = sigmoid(np.dot(Wf, concat) + bf)   
    it = sigmoid(np.dot(Wi, concat) + bi)
    cct = np.tanh(np.dot(Wc, concat) + bc)
# * means element wise multiplication if 4*3 matrix multiply by 4*3 then resultant matrix also be of 4*3
    c_next = ft * c_prev + it * cct
    ot = sigmoid(np.dot(Wo, concat) + bo)
    a_next = ot * np.tanh(c_next)
#  ft,it,cct,c_next,ot,a_next will be of na,m 

    yt_pred = np.dot(Wy, a_next) + by
#   yt will be of ny,nm     
       
    # store values needed for backward propagation in cache
    cache = (ft,it,ot,cct,c_next,a_next,yt_pred,xt,a_prev,c_prev,parameters)
    
    return a_next, c_next, yt_pred, cache

In [15]:
def lstm_forward(X, a0, parameters):
# x -- Input data for every time-step, of shape (n_x, m, T_x).
# a0 -- Initial hidden state, of shape (n_a, m)
    
# Initialize "caches", which will track the list of all the caches
    caches = []
    
    n_y,n_a = parameters["Wy"].shape
    n_x, m, T_x = X.shape 
            
# Initialize a_next and c_next 
    a_next = a0
    c_next = np.zeros(a_next.shape)
    
# initialize "a", "c" and "y" with zeros 
    a = np.zeros((n_a, m, T_x))
    c = np.zeros((n_a, m, T_x))
    y = np.zeros((n_y, m, T_x))

        
# loop over all time-steps
    for t in range(T_x):
        a_next, c_next, yt, cache = lstm_cell_forward(X[:, :, t], a_next, c_next, parameters)
        
        a[:,:,t] = a_next
    
        c[:,:,t]  = c_next
        
        y[:,:,t] = yt
        
        caches.append(cache)
     
    
# store values needed for backward propagation in cache
    caches = (caches, X)

    return y,a,c,caches

In [16]:
def compute_cost(y, Y):
    _,m,_ = Y.shape
    
    cost = np.sqrt((1/m)*(np.sum((y-Y)**2)))
    
    cost = np.squeeze(cost)
    
    return cost

In [17]:
def lstm_cell_backward(da_next, dc_next, Y, caches):
# da_next -- Gradient of loss with respect to next hidden state
# cache -- python dictionary containing useful values (output of rnn_cell_forward())

# Retrieve values from cache
    (ft,it,ot,cct,c_next,a_next,yt_pred,xt,a_prev,c_prev,parameters) = caches
    
# Retrieve dimensions from xt's and a_next's shape (≈2 lines)
    n_x, m = xt.shape
    n_a, m = a_prev.shape
    
# calculating da_next and dc_next used for updating gates
    dy = Y-yt_pred
    da_next = np.dot(parameters['Wy'].T, dy) + da_next
    dc_next = (da_next * ot * (1-np.tanh(c_next**2))) + dc_next
       
    
# updating gates which is used for weights and bias
    dot = da_next * np.tanh(c_next) * ot * (1 - ot)
    dcct = (da_next * ot * (1 - np.tanh(c_next) ** 2) + dc_next) * it * (1 - cct ** 2)
    dit = (da_next * ot * (1 - np.tanh(c_next) ** 2) + dc_next) * cct * (1 - it) * it
    dft = (da_next * ot * (1 - np.tanh(c_next) ** 2) + dc_next) * c_prev * ft * (1 - ft)   


    concat = np.zeros((n_a+n_x,m))
    concat[: n_a, :] = a_prev
    concat[n_a:,:] = xt
    
# updating weights and bias 
    dWf = np.dot(dft, concat.T)
    dWi = np.dot(dit, concat.T)
    dWo = np.dot(dot, concat.T)
    dWc = np.dot(dcct, concat.T)
    dbf = np.sum(dft, axis = 1, keepdims=True)
    dbi = np.sum(dit, axis = 1, keepdims=True)
    dbo = np.sum(dot, axis = 1, keepdims=True)
    dbc = np.sum(dcct, axis = 1, keepdims=True)
    
    dWy = np.dot(dy, a_next.T)
    dby = np.sum(dy, axis = 1, keepdims = True)
    
#     da_prev = np.dot(Wf[:, :n_a].T, df_gate) + np.dot(Wc[:, :n_a].T, dc_tilda) + np.dot(Wu[:, :n_a].T, du_gate) + np.dot(Wo[:, :n_a].T, do_gate)
#     dc_prev = (da_next * o_gate * (1 - np.tanh(c_next) ** 2) + dc_next) * f_gate
#     dxt = np.dot(Wf[:, n_a:].T, df_gate) + np.dot(Wc[:, n_a:].T, dc_tilda) + np.dot(Wu[:, n_a:].T, du_gate) + np.dot(Wo[:, n_a:].T, do_gate)
    
    dAX = np.dot(parameters["Wf"].T, dft) + np.dot(parameters['Wi'].T, dit) + np.dot(parameters['Wo'].T, dot) + np.dot(parameters['Wc'].T, dcct)
     # dAX = derivative of both da_prev and dx calculated combined
    
    da_prev = dAX[:n_a,:]
    dxt = dAX[n_a:,:]
    
    dc_prev = (da_next * ot * (1 - np.tanh(c_next) ** 2) + dc_next) * ft


    
    gradients = {
        'da_prev': da_prev, 'dxt':dxt, 'dc_prev':dc_prev, 'dWf':dWf, 'dbf':dbf, 'dWi':dWi,'dbi':dbi, "dWo":dWo,'dbo':dbo, 'dWc':dWc, 'dbc':dbc, "dWy":dWy,'dby':dby
    }
    
    return gradients
    
# Returns:
# gradients -- python dictionary containing:
#              dx -- Gradients of input data, of shape (n_x, m)
#              da_prev -- Gradients of previous hidden state, of shape (n_a, m)
#              dWax -- Gradients of input-to-hidden weights, of shape (n_a, n_x)
#              dWaa -- Gradients of hidden-to-hidden weights, of shape (n_a, n_a)
#              dba -- Gradients of bias vector, of shape (n_a, 1)

In [18]:
def lstm_backward(Y, caches):
    (caches, X) = caches
    (ft, it, ot, cct, c_next, a_next, yt_pred, xt, a_prev, c_prev, parameters) = caches[0]
   
    n_x, m = xt.shape
    n_y, m = yt_pred.shape 
    n_a,m = a_prev.shape
    T_x = len(caches)
    
# initialize the gradients with the right sizes 
    dx = np.zeros((n_x, m, T_x))  
# dx is not needed here it is used in the LSTM model where output of one is input for other timestep
    da0 = np.zeros((n_a, m))
# da0 is updated in backpropgation beacuse intially we define a0 with a matrix of zero but we will use this da0 matrix in forward propogation for 
    da_prevt = np.zeros((n_a, m))
    dc_prevt = np.zeros((n_a, m))
    
    dWf = np.zeros((n_a, n_a + n_x))
    dWi = np.zeros((n_a, n_a + n_x))
    dWc = np.zeros((n_a, n_a + n_x))
    dWo = np.zeros((n_a, n_a + n_x))
    dbf = np.zeros((n_a, 1))
    dbi = np.zeros((n_a, 1))
    dbc = np.zeros((n_a, 1))
    dbo = np.zeros((n_a, 1))
    
 # loop back over the whole sequence
    for t in reversed(range(T_x)):
        gradients = lstm_cell_backward(da_prevt, dc_prevt, Y[:,:,t], caches[t])
        
        dWf = gradients['dWf']
        dWo = gradients['dWo']
        dWi = gradients['dWi']
        dWc = gradients['dWc']
        dWy = gradients['dWy']
        dbf = gradients['dbf']
        dbo = gradients['dbo']
        dbi = gradients['dbi']
        dbc = gradients['dbc']
        dby = gradients['dby']
        
        da_prevt = gradients['da_prev']
        dc_prevt = gradients['dc_prev']
    
    da0 = da_prevt  
#   last da_prevt during backpropogation is set to da0 which will act a0 during forward proppgation bz we set ao as a matrix of zero 

    gradients = {'da0':da0, 'dWf':dWf, 'dbf':dbf, 'dWo':dWo, 'dbo':dbo, 'dWi':dWi, 'dbi':dbi, 
                'dWc':dWc, 'dbc':dbc, 'dWy':dWy, 'dby':dby}
    return gradients

In [None]:
def update_parameters(parameters, gradients, learning_rate):
    
    parameters['Wf'] += learning_rate * gradients['dWf']
    parameters['Wo'] += learning_rate * gradients['dWo']
    parameters['Wi'] += learning_rate * gradients['dWi']
    parameters['Wc'] += learning_rate * gradients['dWc']
    parameters['Wy'] += learning_rate * gradients['dWy']
    
    parameters['bf'] += learning_rate * gradients['dbf']
    parameters['bo'] += learning_rate * gradients['dbo']
    parameters['bi'] += learning_rate * gradients['dbi']
    parameters['bc'] += learning_rate * gradients['dbc']
    parameters['by'] += learning_rate * gradients['dby']
    
    return parameters

In [None]:
def model(X, Y, num_iterations=2000, learning_rate = 0.1, print_cost = True):
    costs = []
    
    n_x = X_train.shape[0]
    n_y = Y_train.shape[0]
    n_a = 10
    parameters = initialize_parameters(n_a,n_x,n_y)
    a0 = np.random.randn(n_a,1)
    
    for i in range(0, num_iterations):
        y, a, c,caches = lstm_forward(X, a0, parameters)
        
        cost = compute_cost(y,Y)
        
        grads = lstm_backward(Y, caches)

        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i%100 == 0:
            print('Cost after iteration %i: %f' %(i, cost))
        if print_cost and i%100 == 0:
            costs.append(cost)
            
    plt.figure()
    plt.plot(costs)
    plt.xlabel('Iterations (per hundered)')
    plt.ylabel('Cost')
    plt.title('Learning rate = ' +str(learning_rate))
    plt.show()
    
    return parameters

In [None]:
type(Y_train)

numpy.ndarray

In [None]:

type(X_train)

pandas.core.frame.DataFrame

In [None]:
type(Y_test)

numpy.ndarray

In [None]:
type(X_test)

pandas.core.frame.DataFrame

In [None]:
X_train = X_train.values
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]).T
X_test = X_test.values
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]).T
Y_train = Y_train.reshape(Y_train.shape[0], 1, Y_train.shape[1]).T
Y_test = Y_test.reshape(Y_test.shape[0], 1, Y_test.shape[1]).T

In [None]:
 X_train.shape

(3, 1, 9840)

In [None]:
 Y_test.shape

(1, 1, 4218)

In [None]:
parameters = model(X_train, Y_train)

Cost after iteration 0: 1503.408225


  


Cost after iteration 100: 1566.428662
Cost after iteration 200: 1566.428662
Cost after iteration 300: 1566.428662
Cost after iteration 400: 1566.428662
Cost after iteration 500: 1566.428662
Cost after iteration 600: 1566.428662
Cost after iteration 700: 1566.428662
Cost after iteration 800: 1566.428662
Cost after iteration 900: 1566.428662
Cost after iteration 1000: 1566.428662
Cost after iteration 1100: 1566.428662
Cost after iteration 1200: 1566.428662
Cost after iteration 1300: 1566.428662
Cost after iteration 1400: 1566.428662
Cost after iteration 1500: 1566.428662
Cost after iteration 1600: 1566.428662
Cost after iteration 1700: 1566.428662
Cost after iteration 1800: 1566.428662
