### Activation Function

In [1]:
import numpy as np
import pandas as pd

In [2]:
def sigmoid(z):
    return 1.0/(1+np.exp(-z))
def sigmoid_derivative(z):
    return sigmoid(z)*(1.0-sigmoid(z))

### Building a Neural Network from scratch

In [3]:
def train(X, y, n_hidden, learning_rate, n_iter):
    m, n_input = X.shape
    W1 = np.random.randn(n_input, n_hidden)
    b1 = np.zeros((1,n_hidden))
    W2 = np.random.randn(n_hidden, 1)
    b2 = np.zeros ((1, 1))
    for i in range(1, n_iter+1):
        Z2 = np.matmul(X, W1) + b1
        A2 = sigmoid(Z2)
        Z3 = np.matmul(A2, W2) + b2
        A3 = Z3
        dZ3 = A3-y
        dW2 = np.matmul(A2.T, dZ3)
        db2 = np.sum(dZ3, axis=0, keepdims=True)
        dZ2 = np.matmul(dZ3, W2.T) * sigmoid_derivative(Z2)
        dW1 = np.matmul(X.T, dZ2)
        db1 = np.sum(dZ2, axis=0)
        W2 = W2 - learning_rate * dW2 / m
        b2 = b2 - learning_rate * db2 / m
        W1 = W1 - learning_rate * dW1 / m
        b1 = b1 - learning_rate * db1 / m
        if i % 100 == 0:
            cost = np.mean((y - A3) ** 2)
            print('Iteration %i, training loss: %f' %(i, cost))
    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model

### Reading the dataset

In [4]:
mydata = pd.read_csv("19880101_20191231.csv", index_col='Date')
mydata

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1988-01-04,1950.80,2030.00,1950.80,2015.30,20877900
1988-01-05,2021.40,2075.30,2021.40,2031.50,27195361
1988-01-06,2031.50,2058.20,2012.80,2037.80,18790681
1988-01-07,2037.80,2061.50,2004.60,2051.90,21377283
1988-01-08,2051.90,2058.70,1898.00,1911.30,27445517
...,...,...,...,...,...
2019-12-24,28572.57,28576.80,28503.21,28515.45,86151979
2019-12-26,28539.46,28624.10,28535.15,28621.39,156025977
2019-12-27,28675.34,28701.66,28608.98,28645.26,182181663
2019-12-30,28654.76,28664.69,28428.98,28462.14,181507192


### Feature Generation

In [5]:
def add_original_feature(df, df_new):
    df_new['open'] = df['Open']
    df_new['open_1'] = df['Open'].shift(1)
    df_new['close 1'] = df['Close'].shift(1)
    df_new['high_1'] = df['High'].shift(1)
    df_new['low 1'] = df['Low'].shift(1)
    df_new['volume 1'] = df['Volume'].shift(1)

In [6]:
def add_avg_price(df, df_new):
    df_new['avg_price_5'] =df['Close'].rolling(5).mean().shift (1)
    df_new['avg_price_30']=df['Close'].rolling(21).mean().shift (1)
    df_new['avg_price_365']=df['Close'].rolling(252).mean().shift (1)
    df_new['ratio_avg_price_5_30']=df_new['avg_price_5'] / df_new['avg_price_30']
    df_new['ratio_avg_price_5_365']=df_new['avg_price_5'] / df_new['avg_price_365']
    df_new['ratio_avg_price_30_365']=df_new['avg_price_30'] / df_new['avg_price_365']

In [7]:
def add_avg_volume (df, df_new):
    df_new['avg_volume_5']=df['Volume'].rolling(5).mean().shift(1)
    df_new['avg_volume_30']=df['Volume'].rolling(21).mean().shift(1)
    df_new['avg_volume_365']=df['Volume'].rolling(252).mean().shift(1)
    df_new['ratio_avg_volume_5_30']=df_new['avg_volume_5'] / df_new['avg_volume_30']
    df_new['ratio_avg_volume_5_365'] =df_new['avg_volume_5'] / df_new['avg_volume_365']
    df_new['ratio_avg_volume_30_365']=df_new['avg_volume_30'] / df_new['avg_volume_365']

In [8]:
def add_std_price(df, df_new):
    df_new['std_price_5']=df['Close'].rolling(5).std().shift(1)
    df_new['std_price_30']=df['Close'].rolling(21).std().shift(1)
    df_new['std_price_365']=df['Close'].rolling(252).std().shift(1)
    df_new['ratio_std_price_5_30'] =df_new['std_price_5'] / df_new['std_price_30']
    df_new['ratio_std_price_5_365'] =df_new['std_price_5'] / df_new['std_price_365']
    df_new['ratio_std_price_30_365'] =df_new['std_price_30'] / df_new['std_price_365']

In [9]:
def add_std_volume (df, df_new):
    df_new['std_volume_5']=df['Volume'].rolling(5).std().shift (1)
    df_new['std_volume_30']=df['Volume'].rolling(21).std().shift(1)
    df_new['std_volume_365']=df['Volume'].rolling(252).std().shift(1)
    df_new['ratio_std_volume_5_30']=df_new['std_volume_5'] /df_new['std_volume_30']
    df_new['ratio_std_volume_5_365']=df_new['std_volume_5'] /df_new['std_volume_365']
    df_new['ratio_std_volume_30_365']=df_new['std_volume_30'] /df_new['std_volume_365']

In [10]:
def add_return_feature(df, df_new):
    df_new['return_1']=((df['Close']-df['Close'].shift (1))/ df['Close'].shift(1)).shift(1)
    df_new['return_5']=((df['Close']-df['Close'].shift (5))/df['Close'].shift(5)).shift(1)
    df_new['return_30']=((df['Close' ]-df['Close'].shift (21)) / df['Close'].shift(21)).shift(1)
    df_new['return_365']=((df['Close']-df['Close'].shift (252)) / df['Close'].shift (252)).shift(1)
    df_new['moving_avg_5']=df_new['return_1'].rolling(5).mean().shift(1)
    df_new['moving_avg_30']=df_new['return_1'].rolling(21).mean().shift(1)
    df_new['moving_avg_365']=df_new['return_1'].rolling(252).mean ().shift(1)

In [17]:
def generate_features(df):
    df_new = pd.DataFrame()
# 6 original features
    add_original_feature(df, df_new)
# 31 generated features
    add_avg_price(df, df_new) 
    add_avg_volume (df, df_new) 
    add_std_price(df, df_new) 
    add_std_volume(df, df_new)
    add_return_feature(df, df_new)
# the target
    df_new['close'] = df['Close']
    df_new = df_new.dropna (axis=0)
    return df_new

In [18]:
data = generate_features(mydata)
data.head()

Unnamed: 0_level_0,open,open_1,close 1,high_1,low 1,volume 1,avg_price_5,avg_price_30,avg_price_365,ratio_avg_price_5_30,...,ratio_std_volume_5_365,ratio_std_volume_30_365,return_1,return_5,return_30,return_365,moving_avg_5,moving_avg_30,moving_avg_365,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1989-01-04,2146.6,2168.4,2144.6,2168.4,2127.1,17302883.0,2165.0,2150.62381,2062.113095,1.006685,...,0.562543,0.722978,-0.011067,-0.011204,0.020315,0.055673,0.00077,0.001222,0.000354,2177.7
1989-01-05,2177.7,2146.6,2177.7,2183.4,2146.6,15714720.0,2168.0,2154.690476,2062.668254,1.006177,...,0.473516,0.723715,0.015434,0.006936,0.040816,0.068652,-0.00223,0.000979,0.000278,2190.5
1989-01-06,2190.5,2177.7,2190.5,2205.2,2173.0,20303094.0,2172.82,2157.866667,2063.218254,1.00693,...,0.579903,0.74763,0.005878,0.011124,0.031406,0.067547,0.001428,0.001931,0.000327,2194.3
1989-01-09,2194.3,2190.5,2194.3,2213.8,2182.3,16494441.0,2175.14,2160.004762,2064.34127,1.007007,...,0.515648,0.745619,0.001735,0.005315,0.02089,0.148067,0.002262,0.001494,0.000323,2199.5
1989-01-10,2199.5,2194.3,2199.5,2209.1,2185.0,18410324.0,2181.32,2162.190476,2065.350794,1.008847,...,0.278728,0.741686,0.00237,0.014249,0.021313,0.13079,0.001104,0.001003,0.000602,2193.2


### Splitting the Dataset

In [19]:
start_train ='1988-01-01'
end_train='2018-12-31'
start_test = '2019-01-01'
end_test='2019-12-31'

data_train=data.loc[start_train:end_train]
X_train = data_train.drop('close', axis=1).values
y_train = data_train['close'].values.reshape(-1,1)
data_test = data.loc[start_test:end_test]
X_test = data_test.drop('close',axis=1).values
y_test = data_test['close'].values

In [20]:
X_train

array([[ 2.14660000e+03,  2.16840000e+03,  2.14460000e+03, ...,
         7.70160586e-04,  1.22198334e-03,  3.53781858e-04],
       [ 2.17770000e+03,  2.14660000e+03,  2.17770000e+03, ...,
        -2.23014030e-03,  9.78736087e-04,  2.77966171e-04],
       [ 2.19050000e+03,  2.17770000e+03,  2.19050000e+03, ...,
         1.42840080e-03,  1.93118417e-03,  3.26906475e-04],
       ...,
       [ 2.26290600e+04,  2.18577300e+04,  2.28784500e+04, ...,
        -1.56961511e-02, -5.38147761e-03, -4.52098686e-04],
       [ 2.32136100e+04,  2.26290600e+04,  2.31388200e+04, ...,
        -6.42770472e-03, -2.65996065e-03, -2.49777510e-04],
       [ 2.31539400e+04,  2.32136100e+04,  2.30624000e+04, ...,
        -1.17823747e-03, -2.81270786e-03, -2.03358072e-04]])

In [21]:
y_train

array([[ 2177.7 ],
       [ 2190.5 ],
       [ 2194.3 ],
       ...,
       [23138.82],
       [23062.4 ],
       [23327.46]])

### Scaling the Features

In [23]:
X_test

array([[ 2.30586100e+04,  2.31539400e+04,  2.33274600e+04, ...,
         2.14053440e-03, -3.17964288e-03, -2.20968383e-04],
       [ 2.31763900e+04,  2.30586100e+04,  2.33462400e+04, ...,
         8.06329058e-03, -3.82086621e-03, -1.85485319e-04],
       [ 2.28949200e+04,  2.31763900e+04,  2.26862200e+04, ...,
         1.40443893e-02, -3.73073680e-03, -1.63391611e-04],
       ...,
       [ 2.86753400e+04,  2.85394600e+04,  2.86213900e+04, ...,
         1.75359702e-03,  1.09440696e-03,  1.10313668e-03],
       [ 2.86547600e+04,  2.86753400e+04,  2.86452600e+04, ...,
         2.69389352e-03,  9.45297177e-04,  9.20078594e-04],
       [ 2.84146400e+04,  2.86547600e+04,  2.84621400e+04, ...,
         1.88559616e-03,  8.91338877e-04,  8.78227059e-04]])

In [24]:
y_test

array([23346.24, 22686.22, 23433.16, 23531.35, 23787.45, 23879.12,
       24001.92, 23995.95, 23909.84, 24065.59, 24207.16, 24370.1 ,
       24706.35, 24404.48, 24575.62, 24553.24, 24737.2 , 24528.22,
       24579.96, 25014.86, 24999.67, 25063.89, 25239.37, 25411.52,
       25390.3 , 25169.53, 25106.33, 25053.11, 25425.76, 25543.27,
       25439.39, 25883.25, 25891.32, 25954.44, 25850.63, 26031.81,
       26091.95, 26057.98, 25985.16, 25916.  , 26026.32, 25819.65,
       25806.63, 25673.46, 25473.23, 25450.24, 25650.88, 25554.66,
       25702.89, 25709.94, 25848.87, 25914.1 , 25887.38, 25745.67,
       25962.51, 25502.32, 25516.83, 25657.73, 25625.59, 25717.46,
       25928.68, 26258.42, 26179.13, 26218.13, 26384.63, 26424.99,
       26341.02, 26150.58, 26157.16, 26143.05, 26412.3 , 26384.77,
       26452.66, 26449.54, 26559.54, 26511.05, 26656.39, 26597.05,
       26462.08, 26543.33, 26554.39, 26592.91, 26430.14, 26307.79,
       26504.95, 26438.48, 25965.09, 25967.33, 25828.36, 25942

In [25]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_Scaled_train = scaler.fit_transform(X_train)
X_Scaled_test= scaler.transform(X_test)

In [26]:
X_train.shape

(7558, 37)

In [27]:
y_train.shape

(7558, 1)

### Train the neural network

In [28]:
n_hidden = 20
learning_rate = 0.1
n_iter=2000
model = train(X_Scaled_train, y_train, n_hidden, learning_rate, n_iter)

  return 1.0/(1+np.exp(-z))


Iteration 100, training loss: 2035425.889292
Iteration 200, training loss: 1189468.873577
Iteration 300, training loss: 824851.380731
Iteration 400, training loss: 618299.105001
Iteration 500, training loss: 574030.694237
Iteration 600, training loss: 577404.183735
Iteration 700, training loss: 506013.252002
Iteration 800, training loss: 479225.070435
Iteration 900, training loss: 473485.377017
Iteration 1000, training loss: 460872.548907
Iteration 1100, training loss: 456507.155906
Iteration 1200, training loss: 441657.768261
Iteration 1300, training loss: 432295.356275
Iteration 1400, training loss: 431010.123064
Iteration 1500, training loss: 421220.762288
Iteration 1600, training loss: 455993.128381
Iteration 1700, training loss: 433146.175216
Iteration 1800, training loss: 430808.111738
Iteration 1900, training loss: 425919.715398
Iteration 2000, training loss: 429072.154668


### Prediction Function

In [29]:
def predict(x, model):
    W1 = model['W1']
    b1 = model['b1']
    W2 = model['W2']
    b2 = model['b2']
    A2 = sigmoid(np.matmul(x,W1)+b1)
    A3 = np.matmul(A2,W2)+b2
    return A3

In [30]:
predictions = predict(X_Scaled_test, model)

  return 1.0/(1+np.exp(-z))


In [31]:
print(predictions)

[[20308.3418912 ]
 [20308.3418912 ]
 [20308.3418912 ]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24328.03271779]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24630.12797142]
 [24630.12797142]
 [24630.12797142]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49883309]
 [24330.49

In [32]:
print(y_test)

[23346.24 22686.22 23433.16 23531.35 23787.45 23879.12 24001.92 23995.95
 23909.84 24065.59 24207.16 24370.1  24706.35 24404.48 24575.62 24553.24
 24737.2  24528.22 24579.96 25014.86 24999.67 25063.89 25239.37 25411.52
 25390.3  25169.53 25106.33 25053.11 25425.76 25543.27 25439.39 25883.25
 25891.32 25954.44 25850.63 26031.81 26091.95 26057.98 25985.16 25916.
 26026.32 25819.65 25806.63 25673.46 25473.23 25450.24 25650.88 25554.66
 25702.89 25709.94 25848.87 25914.1  25887.38 25745.67 25962.51 25502.32
 25516.83 25657.73 25625.59 25717.46 25928.68 26258.42 26179.13 26218.13
 26384.63 26424.99 26341.02 26150.58 26157.16 26143.05 26412.3  26384.77
 26452.66 26449.54 26559.54 26511.05 26656.39 26597.05 26462.08 26543.33
 26554.39 26592.91 26430.14 26307.79 26504.95 26438.48 25965.09 25967.33
 25828.36 25942.37 25324.99 25532.05 25648.02 25862.68 25764.   25679.9
 25877.33 25776.61 25490.47 25585.69 25347.77 25126.41 25169.88 24815.04
 24819.78 25332.18 25539.57 25720.66 25983.94 26062.68