In [None]:
!pip install -q mplfinance

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers

import sklearn
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, confusion_matrix

import mplfinance as mplf

In [None]:
np.__version__, pd.__version__, tf.__version__, keras.__version__, sklearn.__version__

In [None]:
SEED = 1291

In [None]:
ohlc = pd.read_csv("../input/candle-stick-patterns/ohlc.csv", index_col=0, parse_dates=True)
print(ohlc.shape)
ohlc[:3]

data_df = pd.read_pickle("../input/candlestick-eda/data_df.pkl")
data_df = data_df.sort_values("imgID").reset_index(drop=True)
print(data_df.shape)
data_df[:3]

Data_Size = data_df.shape[0]


In [None]:
data_df = pd.concat([ohlc.reset_index(drop=True)[:Data_Size],data_df],1)
print(data_df.shape)
data_df[:3]

In [None]:
# preparing labels to forecast rise on next 3rd bar
# window = size 5 with center labeled <--|-->, so have to shift (5-1)/2 = 3 

NBars = 3
window_size = 5
targetBarPos = 3


pct_changes = ((ohlc["high"].shift(-(targetBarPos+NBars)).rolling(window_size, center=True).mean() - ohlc["high"])/ohlc["high"])
pct_changes.plot(kind="hist", bins=120)
plt.xlim(-0.5,0.5)
plt.show()

# categorize ranges of change
qsize = 0.125
qs = np.arange(0,1+qsize,qsize)
qranges = pd.qcut(pct_changes, q = qs)
print(qs)

# use top ranges as 1 and others as 0
data_df["h_labels"] = qranges.cat.codes.values[:Data_Size]
data_df["ts"] = qranges.index[:Data_Size]
data_df = data_df.set_index("ts")
data_df["h_labels"] = data_df["h_labels"].map({
    5:1,6:1,7:1
})
data_df["h_labels"] = data_df["h_labels"].fillna(0) # should drop -1 first, not handled here

dict(
    zip(
        qranges.cat.categories, 
        range(len(qranges.cat.categories))
    )
)
# -1 for nan's

In [None]:
data_df.head(10)

# Prep X, y

In [None]:
X = data_df.sort_values("imgID")["imgData"].apply(lambda x: x.reshape(1,40,40,1))
X = np.concatenate(X.values)
X.shape

y = data_df["h_labels"]

In [None]:
y.value_counts()

# Split Data

In [None]:
X.shape

In [None]:
tss = TimeSeriesSplit(n_splits=2)
for train_idx, test_idx in tss.split(X,y):
    break

X_tr, X_ts = X[train_idx], X[test_idx]
y_tr, y_ts = y[train_idx], y[test_idx]

[x.shape for x in [X_tr, X_ts, y_tr, y_ts]]

In [None]:
model = keras.models.load_model('../input/candle-stick-autoencoder/best_model.ckp')
encoder = keras.models.Sequential(model.layers[:13])
for layer in encoder.layers: layer.trainable = False
# encoder.summary()

In [None]:
def define_model():
    clf = keras.models.Sequential(model.layers[:13] + 
                                 [
                                     layers.Flatten(),
                                     layers.Dense(46, activation="relu", name="dense_clf"),
                                     layers.Dropout(0.3, name="drop_clf"),
                                     layers.Dense(16, activation="relu", name="dense_clf_1"),
                                     layers.Dense(1, activation="sigmoid", name="dense_clf_2"),
                                 ])
    return clf

clf = define_model()
clf.summary()

In [None]:
np.random.seed(1291)

clf = define_model()
adam = keras.optimizers.Adam(learning_rate=0.01)
clf.compile(optimizer=adam, loss='binary_crossentropy')

es = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.00001,
    patience=15,
    verbose=1,
    mode='auto',
    restore_best_weights=True)
ckp = keras.callbacks.ModelCheckpoint(
    filepath="best_model_clf.ckp",
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch',
)

clf.fit(X_tr, y_tr, 
                batch_size=128,
                epochs=1500,
                verbose=1,
                validation_split=0.2,
                callbacks=[es, ckp] 
               )

In [None]:
plt.plot(clf.history.history["loss"], ".:")
plt.plot(clf.history.history["val_loss"], ".:")

In [None]:
pred_y_tr = (
    clf.predict(X_tr)
    .round()
    .reshape(-1)
)

pred_y_ts = (
    clf.predict(X_ts)
    .round()
    .reshape(-1)
)

In [None]:
cr = classification_report(y_tr, pred_y_tr)
print(cr)

cm = pd.DataFrame(
    confusion_matrix(y_tr, pred_y_tr), 
    columns=tuple(zip(["pred","pred"],[0,1])), 
    index=tuple(zip(["true","true"],[0,1]))
)
cm.style.background_gradient()

In [None]:
cr = classification_report(y_ts, pred_y_ts)
print(cr)

cm = pd.DataFrame(
    confusion_matrix(y_ts, pred_y_ts),
    columns=tuple(zip(["pred","pred"],[0,1])), 
    index=tuple(zip(["true","true"],[0,1]))
)
cm.style.background_gradient()

# Visualize

- Green markers for True signal
- Yellow markers for Pred signal

# Train Series

In [None]:
cols = ["open","high","low","close"]

win = 350


for i in train_idx:
    data = data_df[cols].iloc[i:(i+win)]
    h_buy_sig = data["low"].copy() - 10
    h_buy_sig[data_df["h_labels"].iloc[i:(i+win)]<1] = np.nan
    # print(h_buy_sig)
    
    h_buy_pred = pred_y_tr[i:(i+win)].copy()
    h_buy_pred[h_buy_pred!=0] = data["low"].iloc[h_buy_pred!=0] - 20
    h_buy_pred[h_buy_pred==0] = np.nan
    
    
    
    adp = [
        mplf.make_addplot(
            h_buy_sig,
            scatter=True,
            markersize=50, marker=r'$\Uparrow$', color="g"
        ),
        mplf.make_addplot(
            h_buy_pred,
            scatter=True,
            markersize=50, marker=r'$\Uparrow$', color="y"
        )
    ]
    
    mplf.plot(
        data,
        type="candle",
        figsize=(33,3),
        addplot=adp,
        axisoff=True
    )
    
    plt.show()
    break

# Test Series

In [None]:
cols = ["open","high","low","close"]

win = 350


for i in test_idx:
    data = data_df[cols].iloc[i:(i+win)]
    h_buy_sig = data["low"].copy() - 10
    h_buy_sig[data_df["h_labels"].iloc[i:(i+win)]<1] = np.nan
    # print(h_buy_sig)
    
    h_buy_pred = pred_y_ts[(i-test_idx[0]):((i-test_idx[0])+win)].copy()
    h_buy_pred[h_buy_pred!=0] = data["low"].iloc[h_buy_pred!=0] - 20
    h_buy_pred[h_buy_pred==0] = np.nan
    
    
    
    adp = [
        mplf.make_addplot(
            h_buy_sig,
            scatter=True,
            markersize=50, marker=r'$\Uparrow$', color="g"
        ),
        mplf.make_addplot(
            h_buy_pred,
            scatter=True,
            markersize=50, marker=r'$\Uparrow$', color="y"
        )
    ]
    
    mplf.plot(
        data,
        type="candle",
        figsize=(33,3),
        addplot=adp,
        axisoff=True
    )
    
    plt.show()
    break