In [1]:
# # Display plots inline and change default figure size
# %matplotlib inline

# Package imports
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mplfinance.original_flavor import candlestick_ohlc
from pandas_datareader import data as datard
from datetime import datetime, timedelta
import yfinance
import matplotlib.ticker as mticker
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, CuDNNLSTM, Conv1D
from matplotlib import pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from scipy.signal import argrelextrema
from mplfinance.original_flavor import candlestick_ohlc
from keras.preprocessing.sequence import pad_sequences

In [2]:
from statsmodels.nonparametric.kernel_regression import KernelReg

def find_local_extreme(data):
    df = data.copy()
    del df['High']
    del df['Low']
    del df['Open']
    prices = df.copy()
    prices = prices.reset_index()
    prices.columns = ['date', 'price']
    prices = prices['price']
    
    kr = KernelReg([prices.values], [prices.index.to_numpy()], var_type='c')
    f = kr.fit([prices.index])

    smooth_prices = pd.Series(data=f[0], index=df.index)

    # Use smoothed prices to determine local minima and maxima
    smooth_prices = pd.Series(data=f[0], index=prices.index)
    smooth_local_max = argrelextrema(smooth_prices.values, np.greater , order = 5)[0]
    smooth_local_min = argrelextrema(smooth_prices.values, np.less , order = 5)[0]
    local_max_min = np.sort(
        np.concatenate([smooth_local_max, smooth_local_min]))
    smooth_extrema = smooth_prices.loc[local_max_min]

    # Iterate over extrema arrays returning datetime of passed
    # prices array. Uses idxmax and idxmin to window for local extrema.
    price_local_max_dt = []
    for i in smooth_local_max:
        if (i > 1) and (i < len(prices)-1):
            price_local_max_dt.append(prices.iloc[i-2:i+2].idxmax())

    price_local_min_dt = []
    for i in smooth_local_min:
        if (i > 1) and (i < len(prices)-1):
            price_local_min_dt.append(prices.iloc[i-2:i+2].idxmin())

    maxima = pd.Series(prices.loc[price_local_max_dt])
    minima = pd.Series(prices.loc[price_local_min_dt])
    extrema = pd.concat([maxima, minima]).sort_index()

    # Return series for each with bar as index
    return extrema, prices, smooth_extrema, smooth_prices


In [3]:
from collections import defaultdict
def find_patterns(extrema, max_bars=35):
    """
    Input:
        extrema: extrema as pd.series with bar number as index
        max_bars: max bars for pattern to play out
    Returns:
        patterns: patterns as a defaultdict list of tuples
        containing the start and end bar of the pattern
    """
    patterns = defaultdict(list)

    # Need to start at five extrema for pattern generation
    for i in range(5, len(extrema)):
        window = extrema.iloc[i-5:i]

        # A pattern must play out within max_bars (default 35)
        if (window.index[-1] - window.index[0]) > max_bars:
            continue

        # Using the notation from the paper to avoid mistakes
        e1 = window.iloc[0]
        e2 = window.iloc[1]
        e3 = window.iloc[2]
        e4 = window.iloc[3]
        e5 = window.iloc[4]

        rtop_g1 = np.mean([e1, e3, e5])
        rtop_g2 = np.mean([e2, e4])
        # Head and Shoulders
        if (e1 > e2) and (e3 > e1) and (e3 > e5) and \
                (abs(e1 - e5) <= 0.03*np.mean([e1, e5])) and \
                (abs(e2 - e4) <= 0.03*np.mean([e1, e5])):
            patterns['HS'].append((window.index[0], window.index[-1]))

        # Inverse Head and Shoulders
        elif (e1 < e2) and (e3 < e1) and (e3 < e5) and \
                (abs(e1 - e5) <= 0.03*np.mean([e1, e5])) and \
                (abs(e2 - e4) <= 0.03*np.mean([e1, e5])):
            patterns['IHS'].append((window.index[0], window.index[-1]))

        # Broadening Top
        elif (e1 > e2) and (e1 < e3) and (e3 < e5) and (e2 > e4):
            patterns['BTOP'].append((window.index[0], window.index[-1]))

        # Broadening Bottom
        elif (e1 < e2) and (e1 > e3) and (e3 > e5) and (e2 < e4):
            patterns['BBOT'].append((window.index[0], window.index[-1]))

        # Triangle Top
        elif (e1 > e2) and (e1 > e3) and (e3 > e5) and (e2 < e4):
            patterns['TTOP'].append((window.index[0], window.index[-1]))

        # Triangle Bottom
        elif (e1 < e2) and (e1 < e3) and (e3 < e5) and (e2 > e4):
            patterns['TBOT'].append((window.index[0], window.index[-1]))

        # Rectangle Top
        elif (e1 > e2) and \
                (abs(e1-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e3-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e5-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e2-rtop_g2)/rtop_g2 < 0.0075) and \
                (abs(e4-rtop_g2)/rtop_g2 < 0.0075) and \
                (min(e1, e3, e5) > max(e2, e4)):

            patterns['RTOP'].append((window.index[0], window.index[-1]))

        # Rectangle Bottom
        elif (e1 < e2) and \
                (abs(e1-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e3-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e5-rtop_g1)/rtop_g1 < 0.0075) and \
                (abs(e2-rtop_g2)/rtop_g2 < 0.0075) and \
                (abs(e4-rtop_g2)/rtop_g2 < 0.0075) and \
                (max(e1, e3, e5) > min(e2, e4)):

            patterns['RBOT'].append((window.index[0], window.index[-1]))

    return patterns

In [4]:
def plot_window(prices, extrema, smooth_prices, smooth_extrema, ax=None):
    """
    Input: data from find_extrema
    Output: plots window for actual and smoothed prices and extrema
    """
    if ax is None:
        fig = plt.figure(figsize=[20,14])
        ax = fig.add_subplot(111)

    prices.plot(ax=ax, color='dodgerblue')
    ax.scatter(extrema.index, extrema.values, color='red')
    smooth_prices.plot(ax=ax, color='lightgrey')
    ax.scatter(smooth_extrema.index, smooth_extrema.values, color='lightgrey')

In [5]:
#plot_window(prices, extrema, smooth_prices, smooth_extrema)

In [6]:
# convert the price data in to actual candlestick parameters. Each candle has 4 parameters
# Open	High Low	Close -< conversion_array
# Size of the body measured by pips
# Size of the upper wicks measured by pips
# Size of the lower wicks measured by pips
# Type of the candle (Bullish or Bearish)(Green or Red)(0 or 1)


#Handle
def ohlc_to_candlestick(conversion_array):
    candlestick_data = [0,0,0,0]

    if conversion_array[3]>conversion_array[0]:
        candle_type=1
        wicks_up=conversion_array[1]-conversion_array[3]
        wicks_down=conversion_array[2]-conversion_array[0]
        body_size=conversion_array[3]-conversion_array[0]

    else:
        candle_type=0
        wicks_up=conversion_array[1]-conversion_array[0]
        wicks_down=conversion_array[2]-conversion_array[3]
        body_size=conversion_array[1]-conversion_array[3]


    if wicks_up < 0:wicks_up=wicks_up*(-1)
    if wicks_down < 0:wicks_down=wicks_down*(-1)
    if body_size < 0:body_size=body_size*(-1)
    
    candlestick_data[0]=candle_type
    candlestick_data[1]=round(round(wicks_up,5)*10000, 4)
    candlestick_data[2]=round(round(wicks_down,5)*10000, 4)
    candlestick_data[3]=round(round(body_size,5)*10000, 4)

    return candlestick_data

In [7]:
def pre_processing(df):
    extrema, prices, smooth_extrema, smooth_prices = find_local_extreme(stock_df)
    patterns = find_patterns(extrema)
    cor_arr = []
    
    for name, pattern_periods in patterns.items():
        if (name=='HS' or name=='IHS' or name=='TTOP' or name=='TBOT' or name=='BTOP' or name=='BBOT') :    
            for start, end in pattern_periods:
                x = prices.index[start-1]
                y = prices.index[end+1]
                if (y-x > 0):
                    # gap = 40 - y + x 
                    # mid = int(gap / 2)
                    # x = x - mid
                    # y = y + gap - mid
                    cor_arr.append([x,y])
    X_raw_tmp = [] #Array for actual data
    X_tmp = [] #Array for converted data
    Y_tmp = [] #Array for prediction based on the condition whether the mean close value of next two week is higher than current close value

    for row in cor_arr:
        tmp = []
        mean = []
        prediction = 0
        X_raw_tmp.append(df.values[row[0] : row[1]]) # append raw value

        for idx in range(row[0] , row[1]):
            converted_data = ohlc_to_candlestick(df.values[idx])
            tmp.append(converted_data) # convert data to cds 

        for idx in range(row[1], row[1] + 14):
            mean.append(df.values[idx][3])

        X_tmp.append(tmp)
        
        if (np.mean(mean) > df.values[row[1]][3]):
            prediction = 1
        Y_tmp.append(prediction)

    X_tmp = np.asarray(X_tmp)
    X_raw_tmp = np.asarray(X_raw_tmp)
    Y_tmp = np.asarray(Y_tmp)
    
    return X_tmp, Y_tmp, X_raw_tmp

# X  = np.empty((0, 40 ,4))
# Y = np.empty((0))
# X_raw = np.empty((0, 40 ,4))


In [8]:
stocks = ['TSLA' , 'MSFT' , 'NFLX' , 'AAPL' , 'AMZN' , 'AVGO' , 'FB' , 'OKTA' , 'REGN' , 'KHC' , 'ADSK' , 'ANSS' , 'FISV' , 'GILD' , 'AMLX']
#, 'MSFT' , 'AMD' , 'NFLX' , 'AVGO' , 'FB' , 'AMZN'
# , , 'ADSK' , 'ANSS' ,  'AVGO' , 'AZN' , 'FISV' , 'GILD' , 'KHC' , 'NFLX' , 'OKTA' , 'REGN
endDate = pd.to_datetime('today')

startDate = endDate - timedelta(days = 300 * 16 )
flag = 0

for stock in stocks:
  stock_df = yfinance.download(stock , startDate , endDate)
  del stock_df['Volume']
  del stock_df['Adj Close']
  X1 , Y1 , X_raw1 = pre_processing(stock_df)
  if (flag == 0):
    X = X1
    Y = Y1
    X_raw = X_raw1
    flag = 1
  else:
    X = np.concatenate((X, X1) , axis= 0)
    Y = np.concatenate((Y, Y1) , axis= 0)
    X_raw = np.concatenate((X_raw, X_raw1) , axis= 0)
 

X = pad_sequences(X)
X_raw = pad_sequences(X_raw)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


  return array(a, dtype, copy=False, order=order)


[*********************100%***********************]  1 of 1 completed


In [9]:
X.shape

(515, 37, 4)

In [21]:
from keras import layers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Conv1D, TimeDistributed,Dropout,Input, Dense, BatchNormalization, GRU, Layer, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam

def nn(shape_1,shape_2):
   
    input = Input(shape= (None , shape_1 , shape_2))

    conv1 = TimeDistributed(Conv1D(32 , kernel_size= 32 , strides= 1 , activation='relu'))(input)
    batch1 = TimeDistributed(BatchNormalization())(conv1)

    flat = TimeDistributed(Flatten())(batch1)

    gru1 = GRU(256, activation='relu',return_sequences=True, kernel_regularizer=l2(0.01))(flat)
    drop1 = Dropout(rate=0.4)(gru1)
    batch1 = BatchNormalization()(drop1)

    gru2 = GRU(128, activation='relu',return_sequences=True, kernel_regularizer=l2(0.01))(batch1)
    drop2 = Dropout(rate=0.4)(gru2)
    batch2 = BatchNormalization()(drop2)

    dense = TimeDistributed(Dense(1, activation='softmax'),name = 'output')(batch2)
    return [input], [dense]


EPOCH_LENGTH = 30
SAMPLE_RATE = 100

input, output = nn( X.shape[1],X.shape[2])
model = Model(inputs=input,outputs=output)

optimizer = Adam(learning_rate=2*1e-4)

# Compile Model
model.compile(optimizer=optimizer, loss={
                  'output': 'sparse_categorical_crossentropy', },
              metrics={
                  'output': 'sparse_categorical_accuracy', },
              sample_weight_mode='temporal')
model.summary()

# model.add(layers.LSTM(50 , return_sequences=True , input_shape = (None, X.shape[-1])))
# model.add(Dropout(0.2))
# model.add(layers.LSTM(100 , return_sequences=False))
# model.add(Dropout(0.2))

# model.add(layers.Dense(units = 1,activation='sigmoid'))

# model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

ValueError: Exception encountered when calling layer "time_distributed_7" (type TimeDistributed).

Input 0 of layer "conv1d_5" is incompatible with the layer: expected min_ndim=3, found ndim=2. Full shape received: (None, 37)

Call arguments received:
  • inputs=tf.Tensor(shape=(None, None, 37), dtype=float32)
  • training=False
  • mask=None

In [11]:
X.shape

(515, 37, 4)

In [12]:
# from keras import layers
# from tensorflow.keras.optimizers import RMSprop

# model = Sequential()


# model.add(layers.GRU(60 , return_sequences=True, input_shape = (None, X.shape[2])))
# model.add(Dropout(0.2))
# model.add(layers.GRU(60 , return_sequences=False))
# model.add(Dropout(0.2))
# model.add(Dense(1))

# model.compile(optimizer= 'rmsprop' , loss="mean_squared_error" , metrics=["acc"])
# model.summary()

In [13]:
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X, Y, test_size=0.5 , shuffle = False)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.5 , shuffle = False)
X_train_raw, X_val_and_test_raw = train_test_split(X_raw, test_size=0.5 , shuffle = False )
X_val_raw, X_test_raw = train_test_split(X_val_and_test_raw, test_size=0.5 , shuffle = False)


In [14]:
model_checkpoint = keras.callbacks.ModelCheckpoint(
    "my_checkpoint", save_best_only=True)

# Set up early stop
early_stopping = keras.callbacks.EarlyStopping(patience=20)

In [15]:
history = model.fit(X_train, Y_train, epochs= 200 , validation_data=(X_val, Y_val) , callbacks=[model_checkpoint , early_stopping])

Epoch 1/200


ValueError: in user code:

    File "C:\Python39\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Python39\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Python39\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Python39\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "C:\Python39\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Python39\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, None, 37, 4), found shape=(None, 37, 4)


In [None]:
# Chart 1 - Model Loss
#plt.subplot(331)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()


# Chart 2 - Model Accuracy
#plt.subplot(332)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='lower right')
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(X_test, Y_test)
print('Test accuracy:', test_acc)

In [None]:
# Timestep = List of candles seqeuence
# Items = Candlestick
# Features = High, Low, Open, Close parametes
def graph_data_ohlc(dataset):
    ax1 = plt.subplot2grid((1,1), (0,0))
    closep=dataset[:,[3]]
    highp=dataset[:,[1]]
    lowp=dataset[:,[2]]
    openp=dataset[:,[0]]
    date = range(len(closep))

    x = 0
    y = len(date)
    ohlc = []

    while x < y:
        append_me = date[x], openp[x], highp[x], lowp[x], closep[x]
        ohlc.append(append_me)
        x += 1

    candlestick_ohlc(ax1, ohlc, width=0.4, colorup='#77d879', colordown='#db3f3f')

    for label in ax1.xaxis.get_ticklabels():
        label.set_rotation(45)
        
    ax1.xaxis.set_major_locator(mticker.MaxNLocator(10))
    ax1.grid(True)


    plt.xlabel('Candle')
    plt.ylabel('Price')
    plt.title('Candlestick sample representation')

    plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0)    
    plt.show()

In [None]:

counter = 0
won = 0
lost = 0
bullish_counter = 0
bearish_counter = 0
test = model.predict(X_test)
alpha_distance = 0.4

for a in test:
    if a > (1-alpha_distance) or a < alpha_distance :
        if Y_test[counter] == 1:
            print('Correct trend is Bullish')
            bullish_counter = bullish_counter + 1
        if Y_test[counter] == 0:
            print('Correct trend is Bearish')
            bearish_counter = bearish_counter + 1
        if a > (1-alpha_distance):print('Model prediction trend is Bullish')
        if a < alpha_distance:print('Model prediction trend is Bearish')

        if (a > (1-alpha_distance) and Y_test[counter] == 1) or (a < alpha_distance and Y_test[counter] == 0):
            won=won+1
            print('WON')
        else:
            print('LOST')
            lost=lost+1

        d_arr = np.empty(( 0 ,4))

        d_arr = np.append(d_arr , X_test_raw[counter] , axis= 0 )
        
        d_arr = d_arr[~np.all(d_arr == 0, axis=1)]

        graph_data_ohlc(d_arr)

    counter=counter+1
    
print('Won: ' + str(won) + ' Lost: ' + str(lost))
print('Success rate: ' + str(round((won*100)/(won+lost),2)) + '%')


In [None]:
bullish_counter / ( bullish_counter + bearish_counter)