# Import Packages

In [0]:
from IPython.display import clear_output
!pip install yfinance
!pip install mpl_finance
clear_output()

In [0]:
import pandas as pd
import numpy as np
from datetime import date
import yfinance as yf
from mpl_finance import candlestick2_ochl
import matplotlib.pyplot as plt

from keras.models import load_model, save_model

import warnings
warnings.filterwarnings("ignore")

import keras
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Flatten, Activation, add
from keras.layers import Dropout, Flatten, LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model, Sequential
from keras import initializers
from keras.engine import Layer, InputSpec
from keras import backend as K
from keras.utils import np_utils
from keras.optimizers import Adam, rmsprop

# Core

in this part, we will find the best model, timestep, and timegap

> **timestep** is the number of candlestick that we use as the input for the model<br>
<br>
> **timegap** is the number of day for the movement to be predicted<br>
eq : if timestep is 14, we will predict the movement of stock price (up or down) 14 days from today<br>
<br>
> **model** is the variation of convolutional neural network. in this notebook, we variate the filter size and the number of convolution layer and pooling in pair.<br><br>
https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53
<br>by : Sumit Saha. <br>for the explanation of Convolutional Neural Network.

In [0]:
# make function to call the variation of convolution
def model(model_name):
    if (model_name == 'model 1')|(model_name == 'model 3'):
        filter_size = 3
    elif (model_name == 'model 2')|(model_name == 'model 4'):
        filter_size = 5
    else:
        TypeError('not recognized model')

    # Input
    input_layer = Input(shape=(100,100,3))
    # First Convolution + Pooling
    x = Conv2D(32, filter_size, filter_size, activation='relu')(input_layer)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    # Second Convolution + Pooling
    x = Conv2D(48, filter_size, filter_size, activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    # Third Convolution + Pooling
    x = Conv2D(64, filter_size, filter_size, activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    # Fourth Convolution + Pooling (Optional)
    if (model_name == 'model 3')|(model_name == 'model 4'):
        x = Conv2D(96, filter_size, filter_size, activation='relu')(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Flatten()(x)
    x = Dropout(0.5)(x)
    # Fully Connected
    x = Dense(output_dim=256, activation='relu')(x)
    x = Dense(output_dim=1, activation='sigmoid')(x)

    model = Model(input_layer, x)

    model.compile(optimizer=rmsprop(lr=1.0e-4),loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [0]:
# determine the Stock we want
stock = 'BMRI.JK'
data = yf.download(stock,'2000-01-01','2020-12-31')
data = data.reset_index()
data['Year'] = [data.loc[i,'Date'].year for i in range(data.shape[0])]

In [0]:
# create the year that we want to use (may be variate)
year = [2016,2017,2018,2019,2020]
train = data[[data.loc[j,'Year'] in year for j in range(data.shape[0])]]
train = train.reset_index().drop('index',axis = 1)

# we use the last year data as the testing data and save the number of data in this variable
n_test_day = train[train['Year']==year[-1]].shape[0]

# use dark background to make the background value into 0 after we change the candlestick image into pixel RGB representation
plt.style.use('dark_background')
train_temp_x = []
train_temp_y = []

# we will iterave the variation 
for timestep in [20,40,60]:
    for timegap in [1,7,14]:
        for k in range(0,train.shape[0]-timestep-timegap):
            c = train.loc[k:k + int(timestep) -1, :]
            if len(c) == int(timestep):
                my_dpi = 96
                fig = plt.figure(figsize=(100 / my_dpi,
                                            100 / my_dpi), dpi=my_dpi)
                ax = fig.add_subplot(1, 1, 1)
                # convert tabular data into image
                candlestick2_ochl(ax, c['Open'], c['Close'], c['High'],
                                    c['Low'], width=0.85,
                                    colorup='#77d879', colordown='#db3f3f', alpha = 1)
                ax.axis('off')
                fig.canvas.draw()
                # conver image into RGB representation
                temp_temp_x = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
                plt.close();
                clear_output()
                temp_temp_x = (temp_temp_x.reshape(fig.canvas.get_width_height()[::-1] + (3,))/255).tolist()
                train_temp_x.append(temp_temp_x)
                train_temp_y.append([1 if train.loc[k+timestep-1+timegap,'Close'] - train.loc[k+timestep-1, 'Close']>=0 else 0])

        # split the data for training and validation
        tx = np.array(train_temp_x)[:-n_test_day]
        ex = np.array(train_temp_x)[-n_test_day:]
        ty = np.array(train_temp_y)[:-n_test_day]
        ey = np.array(train_temp_y)[-n_test_day:]

        # iterate the model
        for models in ['model 1','model 2','model 3','model 4']:
            cnn_model = model_cnn(models)
            nn_history = cnn_model.fit(tx,ty, epochs = 100, batch_size = 100, validation_data=[ex,ey])
            print('\n accuracy train {} and accuracy test {}'.format(np.array(nn_history.history['accuracy'][-10:]).mean(),
                                                                np.array(nn_history.history['val_accuracy'][-10:]).mean()))
            # we will save the result
            #pd.DataFrame(nn_history.history).to_csv('Hasil/Hasil_potong_potong/timestep{}_timegap{}_{}_{}_periode{}{}.csv'.format(timestep,timegap,models,stock,year[0],year[-1]))
            print('timestep{}_timegap{}_{}_{}_periode{}{}.csv'.format(timestep,timegap,models,stock,year[0],year[-1]))