In [None]:
import numpy as np
import pandas as pd
import datatable as dt
import datetime
import gc

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

DEVICE = "CPU"

In [None]:
import warnings
warnings.filterwarnings("ignore")

# 1. Data import and preprocessing

In [None]:
data_path = "/kaggle/input/g-research-crypto-forecasting/"

train = dt.fread(data_path + "train.csv").to_pandas()
asset_details = pd.read_csv(data_path + "asset_details.csv")

In [None]:
# Downgrade data types to save memory
float_cols = list(train.dtypes[train.dtypes == "float64"].index)
train[float_cols] = train[float_cols].astype(np.float32)
    
train["Asset_ID"] = train["Asset_ID"].astype(np.int8)
train["Count"] = train["Count"].astype(np.int32)

In [None]:
# Forward fill data
train_df = pd.DataFrame()
train = train.set_index("timestamp").sort_index()
for asset in asset_details["Asset_ID"].sort_values().values:
    temp_df = train[train["Asset_ID"]==asset]
    temp_df = temp_df.reindex(range(temp_df.index[0],temp_df.index[-1]+60,60),method='pad')
    temp_df = temp_df.reset_index()
    train_df = pd.concat([train_df, temp_df])

train_df["datetime"] = train_df["timestamp"].astype("datetime64[s]")
train_df.set_index("timestamp", inplace=True)

In [None]:
# Drop irrelavant columns
train_df.drop(["VWAP", "Target"], axis=1, inplace=True)

In [None]:
# Calculate log returns
def append_log_returns(df, periods=[1]):
    returns_df = pd.DataFrame()
    for asset_id in asset_details.index:
        train_single = df[df.Asset_ID == asset_id].copy().reset_index()
        
        for item in periods:
            train_single[f"{int(item/60)}h_log_returns"] = np.log(train_single["Close"]).diff(periods=item)
            
            if item < 0:
                train_single[f"{int(item/60)}h_log_returns"] = train_single[f"{int(item/60)}h_log_returns"] * (-1)
            
        returns_df = returns_df.append(train_single.reset_index(drop=False))
        
    #Replace infinite numbers with 0
    returns_df.replace([np.inf, -np.inf], 0, inplace=True)
    returns_df.set_index("timestamp", inplace=True)
    return returns_df

In [None]:
lrs = [60, 720, 1440, 2880, 4320, 5760, 7200, 8640, 10080, -60, -720, -1440, -2880, -4320, -5760, -7200, -8640, -10080]
train_df = append_log_returns(train_df, lrs)

In [None]:
# Drop rows with missing values
train_df.dropna(inplace=True)

# 2. Coin dataframe generation

In [None]:
asset_details

In [None]:
# Coinwise df generation
coin_df = train_df[(train_df["Asset_ID" ] == 6) | (train_df["Asset_ID" ] == 1)]

In [None]:
from sklearn.preprocessing import MinMaxScaler

def resample(df, new_timeframe):
    train_daily = pd.DataFrame()
    for asset_id, asset in zip(asset_details.Asset_ID, asset_details.Asset_Name):
        train_single = df[df.Asset_ID == asset_id].copy().reset_index()
        if train_single.shape[0] > 0:
            train_single_new = train_single[['datetime','Count']].resample(new_timeframe, on='datetime').sum()
            train_single_new['timestamp'] = train_single[['datetime', "timestamp"]].resample(new_timeframe, on='datetime').max()["timestamp"]
            train_single_new['open'] = train_single[['datetime','Open']].resample(new_timeframe, on='datetime').first()['Open']
            train_single_new['high'] = train_single[['datetime','High']].resample(new_timeframe, on='datetime').max()['High']
            train_single_new['low'] = train_single[['datetime','Low']].resample(new_timeframe, on='datetime').min()['Low']
            train_single_new['close'] = train_single[['datetime','Close']].resample(new_timeframe, on='datetime').last()['Close']
            train_single_new['volume'] = train_single[['datetime','Volume']].resample(new_timeframe, on='datetime').sum()['Volume']
            
            for lr in lrs:
                train_single_new[f'{int(lr/60)}h_lr'] = train_single[['datetime',f'{int(lr/60)}h_log_returns']].resample(new_timeframe, on='datetime').last()[f'{int(lr/60)}h_log_returns']
                train_single_new[f'{int(lr/60)}h_pr'] = train_single_new[f'{int(lr/60)}h_lr'] > 0
                train_single_new[f'{int(lr/60)}h_pr'] = train_single_new[f'{int(lr/60)}h_pr'].astype(int)
            
            mean = train_single_new[["open", "high", "low", "close"]].mean(axis=1)
            std = train_single_new[["open", "high", "low", "close"]].std(axis=1)

            train_single_new['asset_ID'] = asset_id
            train_single_new['asset'] = asset
            train_single_new["upper_shadow"] = upper_shadow(train_single_new)
            train_single_new["lower_shadow"] = lower_shadow(train_single_new)
            train_single_new["real_body"] = real_body(train_single_new)
            train_single_new["candle"] = candle_volatility(train_single_new)

            train_single_new['body_mean'] = (train_single_new["open"] + train_single_new["close"]) / 2
            
            train_single_new["upper_shadow_v_real_body"] = (train_single_new["upper_shadow"] / train_single_new["real_body"]) #/ train_single_new["body_mean"]
            train_single_new["lower_shadow_v_real_body"] = (train_single_new["lower_shadow"] / train_single_new["real_body"]) #/ train_single_new["body_mean"]
            train_single_new["real_body_v_candle"] = (train_single_new["real_body"] / train_single_new["candle"]) #/ train_single_new["body_mean"]

            
            train_single_new = shift(train_single_new.copy(), ["upper_shadow", "lower_shadow", "real_body", "candle", "body_mean", "open", "close", "high", "low",
                                "upper_shadow_v_real_body", "lower_shadow_v_real_body", "real_body_v_candle"], [-3, -2, -1, 1, 2, 3])
            
            train_daily = train_daily.append(train_single_new.reset_index(drop=False))
        
    #Replace infinite numbers with 0
    train_daily.replace([np.inf, -np.inf], 0, inplace=True)
    train_daily.set_index(["timestamp", "asset_ID"], inplace=True)
    train_daily.sort_index()
    return train_daily

def upper_shadow(df):
    return df['high'] - np.maximum(df['close'], df['open'])

def lower_shadow(df):
    return np.minimum(df['close'], df['open']) - df['low']

def real_body(df):
    return np.abs(df['close'] - df['open'])

def candle_volatility(df):
    return df['high'] - df['low']

# Creating lagged features
def shift(df, columns, lags=[]):    
    for lag in lags:
        df = df.merge(df[columns].shift(lag), left_index=True, right_index=True, suffixes=('', f'_lag{lag}'), how="inner")
    df = df.dropna(axis=0)
    return df

In [None]:
coin_df = resample(coin_df.copy(), "H") #15T

# 3. Single Candlestick Patterns Analysis

In [None]:
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

def slice_datetime_window(df, start, end):
    start = df.datetime.searchsorted(start)
    end = df.datetime.searchsorted(end)
    return df.iloc[start:end]

def group_standardize(df, N):
    df = df.reset_index()
    moments = pd.DataFrame()
    moments["mean"] = df[["open", "high", "low", "close"]].groupby(df.index // N).agg(func=np.mean, axis=0).mean(axis=1)
    moments["std"] = df[["open", "high", "low", "close"]].groupby(df.index // N).agg(func=np.std, axis=0).std(axis=1)
    moments.index = moments.index * N
    df = df.join(moments).fillna(method="ffill")
    return df

def plot_candlestick(df, slider=True, suffix=""):
    fig = go.Figure(data=[go.Candlestick(x=df.index, open=df[f'open{suffix}'], high=df[f'high{suffix}'], low=df[f'low{suffix}'], close=df[f'close{suffix}'])])
    fig.update_layout(xaxis_rangeslider_visible=slider)
    fig.show()
    
def plot_patterns(df, normalize=False, standardize=False, group_standardization=False, N=1):
    suffix = ""
    df["body_mean"] = 1
    
    if normalize == True:
        suffix = "_sc"
        df["open_sc"] = (df["open"] - df["low"])/(df["high"] - df["low"])
        df["high_sc"] = 1
        df["low_sc"] = 0
        df["close_sc"] = (df["close"] - df["low"])/(df["high"] - df["low"])
    
    elif standardize==True:
        suffix = "_sc"
        mean = df[["open", "high", "low", "close"]].mean(axis=1)
        std = df[["open", "high", "low", "close"]].std(axis=1)
        
        df["open_sc"] = (df["open"] - mean) / std
        df["high_sc"] = (df["high"] - mean) / std
        df["low_sc"] = (df["low"] - mean) / std
        df["close_sc"] = (df["close"] - mean) / std
    
    elif group_standardization == True:
        suffix = "_sc"
        df = group_standardize(df.copy(), N)
        df["open_sc"] = (df["open"] - df["mean"]) / df["std"]
        df["high_sc"] = (df["high"] - df["mean"]) / df["std"]
        df["low_sc"] = (df["low"] - df["mean"]) / df["std"]
        df["close_sc"] = (df["close"] - df["mean"]) / df["std"]
    else:
        df["open"] = df["open"] / df["body_mean"]
        df["high"] = df["high"] / df["body_mean"]
        df["low"] = df["low"] / df["body_mean"]
        df["close"] = df["close"] / df["body_mean"]
        
    plot_candlestick(df.reset_index(), False, suffix)

In [None]:
from scipy import stats
from statsmodels.stats.weightstats import ztest as ztest

def calc_metrics(df, pattern, plot_label=[], col_label=[], thr=None):
    direction_df = pd.DataFrame()
    
#     returns_list = ["168h_lr","144h_lr","120h_lr","96h_lr","72h_lr","48h_lr", "24h_lr", "-24h_lr", "-48h_lr", "-72h_lr", "-96h_lr", "-120h_lr", "-144h_lr", "-168h_lr"]
#     positive_returns_list = ["168h_pr","144h_pr","120h_pr","96h_pr","72h_pr","48h_pr", "24h_pr", "-24h_pr", "-48h_pr", "-72h_pr", "-96h_pr", "-120h_pr", "-144h_pr", "-168h_pr"]
    
    returns_list = ["-24h_lr", "-48h_lr", "-72h_lr", "-96h_lr", "-120h_lr", "-144h_lr", "-168h_lr"]
    positive_returns_list = ["-24h_pr", "-48h_pr", "-72h_pr", "-96h_pr", "-120h_pr", "-144h_pr", "-168h_pr"]
    
    replace_dict = {"-24h_pr": "day 1", "-48h_pr": "day 2", "-72h_pr": "day 3", "-96h_pr": "day 4", "-120h_pr": "day 5", "-144h_pr": "day 6", "-168h_pr": "day 7"}
    
    #General direction
    gen = df[(df[pattern] > 0)]
    gen_lr_df = pd.melt(gen, id_vars=["asset"], value_vars=returns_list)
    gen_pr_df = pd.melt(gen, id_vars=["asset"], value_vars=positive_returns_list)

    down = df[(df["48h_lr"] < 0.02) & (df["24h_lr"] < 0.02) & (df[pattern] > 0)]
    down_lr_df = pd.melt(down, id_vars=["asset"], value_vars=returns_list)
    down_pr_df = pd.melt(down, id_vars=["asset"], value_vars=positive_returns_list)
    
    up = df[(df["48h_lr"] > 0.02) & (df["24h_lr"] > 0.02) & (df[pattern] > 0)]
    up_lr_df = pd.melt(up, id_vars=["asset"], value_vars=returns_list)
    up_pr_df = pd.melt(up, id_vars=["asset"], value_vars=positive_returns_list)
    
    # z scores
    ztest_lr_gen_values = ztest(df[returns_list], gen[returns_list], value=0)[1]
    ztest_lr_down_values = ztest(df[returns_list], down[returns_list], value=0)[1]
    ztest_lr_up_values = ztest(df[returns_list], up[returns_list], value=0)[1]
    
    ztest_pr_gen_values = ztest(df[positive_returns_list], gen[positive_returns_list], value=0)[1]
    ztest_pr_down_values = ztest(df[positive_returns_list], down[positive_returns_list], value=0)[1]
    ztest_pr_up_values = ztest(df[positive_returns_list], up[positive_returns_list], value=0)[1]
                                
    gen_pr_df.replace({"variable": replace_dict}, inplace=True)
    down_pr_df.replace({"variable": replace_dict}, inplace=True)
    up_pr_df.replace({"variable": replace_dict}, inplace=True)
    
    fig, ax = plt.subplots(2,3, figsize=(20, 7), gridspec_kw={'height_ratios': [3, 1]}, sharex='col')
    
    primary_color = "#1781b3"
    secondary_color = "#00957e"
    
    clrs_pr_gen = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_gen_values]
    clrs_pr_down = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_down_values]
    clrs_pr_up = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_up_values]
    
    clrs_lr_gen = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_gen_values]
    clrs_lr_down = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_down_values]
    clrs_lr_up = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_up_values]
        
    
    significance = [*clrs_pr_gen, *clrs_pr_down, *clrs_pr_up, *clrs_lr_gen, *clrs_lr_down, *clrs_lr_up].count("grey") / 42
    
#     print(f"Significance: {significance}, thr: {thr}")
    
    
    sns.barplot(x="variable", y="value", data=gen_lr_df, ax=ax[0,0], ci=None, color=primary_color, palette=clrs_pr_gen)
    sns.barplot(x="variable", y="value", data=down_lr_df, ax=ax[0,1], ci=None, color=primary_color, palette=clrs_pr_down)
    sns.barplot(x="variable", y="value", data=up_lr_df, ax=ax[0,2], ci=None, color=primary_color, palette=clrs_pr_up)

    sns.barplot(x="variable", y="value", data=gen_pr_df, ax=ax[1,0], ci=None, color=secondary_color, palette=clrs_lr_gen)
    sns.barplot(x="variable", y="value", data=down_pr_df, ax=ax[1,1], ci=None, color=secondary_color, palette=clrs_lr_gen)
    sns.barplot(x="variable", y="value", data=up_pr_df, ax=ax[1,2], ci=None, color=secondary_color, palette=clrs_lr_gen)

    
    ax[0,0].set_title(f'Total returns, sample size {gen.shape[0]}', fontsize = 15)
    ax[0,1].set_title(f'After downtrend returns, sample size {down.shape[0]}', fontsize = 15)
    ax[0,2].set_title(f'After uptrend returns, sample size {up.shape[0]}', fontsize = 15)
    
    
    ax[0,0].set_ylim(-0.10, 0.10)
    ax[0,1].set_ylim(-0.10, 0.10)
    ax[0,2].set_ylim(-0.10, 0.10)
    ax[1,0].set_ylim(0, 1)
    ax[1,1].set_ylim(0, 1)
    ax[1,2].set_ylim(0, 1)
    
    ax[0,0].set_xlabel(None)
    ax[0,1].set_xlabel(None)
    ax[0,2].set_xlabel(None)
    
    ax[1,0].set_xlabel("Timeframe")
    ax[1,1].set_xlabel("Timeframe")
    ax[1,2].set_xlabel("Timeframe")
    
    ax[0,0].set_ylabel("Mean log returns", labelpad=10)
    ax[0,1].set_ylabel(None)
    ax[0,2].set_ylabel(None)
    
    ax[1,0].set_ylabel("Positive returns probability", labelpad=25)
    ax[1,1].set_ylabel(None)
    ax[1,2].set_ylabel(None)
    
    ax[0,0].tick_params(axis='x', rotation=90, labelbottom=False)
    ax[0,1].tick_params(axis='x', rotation=90, labelbottom=False)
    ax[0,2].tick_params(axis='x', rotation=90, labelbottom=False)
    
    ax[1,0].tick_params(axis='x', rotation=45)
    ax[1,1].tick_params(axis='x', rotation=45)
    ax[1,2].tick_params(axis='x', rotation=45)
    
    
    for index_1, subplot in enumerate(plot_label):
        for index_2, p in enumerate(ax[tuple(subplot)].patches):
            if index_2 in col_label[index_1]:
                label_color = primary_color
                if subplot[0] == 1:
                    label_color = secondary_color
                    
                if ((subplot[0] == 1) and (p.get_height() < 0.5)):
                    ax[tuple(subplot)].text(p.get_x() + p.get_width()/2., p.get_height() - 0.2, '{0:.2f}'.format(p.get_height()), fontsize=12, color="white", ha='center', va='bottom')
                elif p.get_height() < 0:
                    ax[tuple(subplot)].text(p.get_x() + p.get_width()/2., p.get_height() - 0.014, '{0:.2f}'.format(p.get_height()), fontsize=12, color=label_color, ha='center', va='bottom')
                else:
                    ax[tuple(subplot)].text(p.get_x() + p.get_width()/2., p.get_height(), '{0:.2f}'.format(p.get_height()), fontsize=12, color=label_color, ha='center', va='bottom')
                    
    ax[1,0].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    ax[1,1].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    ax[1,2].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    
    
    sns.despine()
    
    plt.savefig(f"barplot_{pattern}.png")
    
#     return direction_df

In [None]:
from scipy import stats
from statsmodels.stats.weightstats import ztest as ztest

def calc_metrics_2(df, pattern, plot_label=[], col_label=[], thr=None):
    direction_df = pd.DataFrame()
    
#     returns_list = ["168h_lr","144h_lr","120h_lr","96h_lr","72h_lr","48h_lr", "24h_lr", "-24h_lr", "-48h_lr", "-72h_lr", "-96h_lr", "-120h_lr", "-144h_lr", "-168h_lr"]
#     positive_returns_list = ["168h_pr","144h_pr","120h_pr","96h_pr","72h_pr","48h_pr", "24h_pr", "-24h_pr", "-48h_pr", "-72h_pr", "-96h_pr", "-120h_pr", "-144h_pr", "-168h_pr"]
    
    returns_list = ["-24h_lr", "-48h_lr", "-72h_lr", "-96h_lr", "-120h_lr", "-144h_lr", "-168h_lr"]
    positive_returns_list = ["-24h_pr", "-48h_pr", "-72h_pr", "-96h_pr", "-120h_pr", "-144h_pr", "-168h_pr"]
    
    replace_dict = {"-24h_pr": "day 1", "-48h_pr": "day 2", "-72h_pr": "day 3", "-96h_pr": "day 4", "-120h_pr": "day 5", "-144h_pr": "day 6", "-168h_pr": "day 7"}
    
    #General direction
    gen = df[(df[pattern] > 0)]
    gen_lr_df = pd.melt(gen, id_vars=["asset"], value_vars=returns_list)
    gen_pr_df = pd.melt(gen, id_vars=["asset"], value_vars=positive_returns_list)

    down = df[(df["48h_lr"] < 0.02) & (df["24h_lr"] < 0.02) & (df[pattern] > 0)]
    down_lr_df = pd.melt(down, id_vars=["asset"], value_vars=returns_list)
    down_pr_df = pd.melt(down, id_vars=["asset"], value_vars=positive_returns_list)
    
    up = df[(df["48h_lr"] > 0.02) & (df["24h_lr"] > 0.02) & (df[pattern] > 0)]
    up_lr_df = pd.melt(up, id_vars=["asset"], value_vars=returns_list)
    up_pr_df = pd.melt(up, id_vars=["asset"], value_vars=positive_returns_list)
    
    # z scores
    ztest_lr_gen_values = ztest(df[returns_list], gen[returns_list], value=0)[1]
    ztest_lr_down_values = ztest(df[returns_list], down[returns_list], value=0)[1]
    ztest_lr_up_values = ztest(df[returns_list], up[returns_list], value=0)[1]
    
    ztest_pr_gen_values = ztest(df[positive_returns_list], gen[positive_returns_list], value=0)[1]
    ztest_pr_down_values = ztest(df[positive_returns_list], down[positive_returns_list], value=0)[1]
    ztest_pr_up_values = ztest(df[positive_returns_list], up[positive_returns_list], value=0)[1]
                                
    gen_pr_df.replace({"variable": replace_dict}, inplace=True)
    down_pr_df.replace({"variable": replace_dict}, inplace=True)
    up_pr_df.replace({"variable": replace_dict}, inplace=True)
    

    primary_color = "#1781b3"
    secondary_color = "#00957e"
    
    clrs_pr_gen = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_gen_values]
    clrs_pr_down = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_down_values]
    clrs_pr_up = ['grey' if (x > 0.05) else primary_color for x in ztest_pr_up_values]
    
    clrs_lr_gen = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_gen_values]
    clrs_lr_down = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_down_values]
    clrs_lr_up = ['grey' if (x > 0.05) else secondary_color for x in ztest_lr_up_values]
        
    
    significance = [*clrs_pr_gen, *clrs_pr_down, *clrs_pr_up, *clrs_lr_gen, *clrs_lr_down, *clrs_lr_up].count("grey") / 42
    
    print(f"Significance: {significance}, thr: {thr}")

    fig = plt.figure(figsize=(13, 10))

    fig, ax = plt.subplot_mosaic([['upper', 'upper', 'upper', 'upper'],
                                  ['lower', 'lower', 'lower', 'lower'],
                                  ['upper left', 'upper left', 'upper right', 'upper right'],
                                  ['lower left', 'lower left', 'lower right', 'lower right']],
                                gridspec_kw={'height_ratios': [6, 2, 3, 1]},
                                figsize=(13, 10),
                                constrained_layout=True)
    
    sns.barplot(x="variable", y="value", data=gen_lr_df, ax=ax["upper"], ci=None, color=primary_color, palette=clrs_pr_gen)
    sns.barplot(x="variable", y="value", data=down_lr_df, ax=ax["upper left"], ci=None, color=primary_color, palette=clrs_pr_down)
    sns.barplot(x="variable", y="value", data=up_lr_df, ax=ax["upper right"], ci=None, color=primary_color, palette=clrs_pr_up)

    sns.barplot(x="variable", y="value", data=gen_pr_df, ax=ax["lower"], ci=None, color=secondary_color, palette=clrs_lr_gen)
    sns.barplot(x="variable", y="value", data=down_pr_df, ax=ax["lower left"], ci=None, color=secondary_color, palette=clrs_lr_gen)
    sns.barplot(x="variable", y="value", data=up_pr_df, ax=ax["lower right"], ci=None, color=secondary_color, palette=clrs_lr_gen)

    
    ax["upper"].set_title(f'Total returns, sample size {gen.shape[0]}', fontsize = 15)
    ax["upper left"].set_title(f'After downtrend returns, sample size {down.shape[0]}')
    ax["upper right"].set_title(f'After uptrend returns, sample size {up.shape[0]}')
    
    ax["upper"].get_shared_x_axes().join(ax["upper"], ax["lower"])
    ax["upper left"].get_shared_x_axes().join(ax["upper left"], ax["lower left"])
    ax["upper right"].get_shared_x_axes().join(ax["upper right"], ax["lower right"])
    
    ax["upper"].set_ylim(-0.10, 0.10)
    ax["upper left"].set_ylim(-0.10, 0.10)
    ax["upper right"].set_ylim(-0.10, 0.10)
    
    ax["lower"].set_ylim(0, 1)
    ax["lower left"].set_ylim(0, 1)
    ax["lower right"].set_ylim(0, 1)
    
    ax["upper"].set_xlabel(None)
    ax["upper left"].set_xlabel(None)
    ax["upper right"].set_xlabel(None)
    ax["lower"].set_xlabel(None)
    ax["lower left"].set_xlabel(None)
    ax["lower right"].set_xlabel(None)
    
    ax["upper"].set_ylabel("Mean log returns", labelpad=10)
    ax["upper left"].set_ylabel("Mean lr", labelpad=10)
    ax["upper right"].set_ylabel(None)
    
    ax["lower"].set_ylabel("Positive returns probability", labelpad=25)
    ax["lower left"].set_ylabel("Pr probability", labelpad=25)
    ax["lower right"].set_ylabel(None)
    
    ax["upper"].tick_params(axis='x', labelbottom=False)
    ax["upper left"].tick_params(axis='x', labelbottom=False)
    ax["upper right"].tick_params(axis='x', labelbottom=False)
    
    ax["lower"].tick_params(axis='x')
    ax["lower left"].tick_params(axis='x', rotation=45)
    ax["lower right"].tick_params(axis='x', rotation=45)
    
    
    for index_1, subplot in enumerate(plot_label):
        for index_2, p in enumerate(ax[subplot].patches):
            if index_2 in col_label[index_1]:
                label_color = primary_color
                if "lower" in subplot:
                    label_color = secondary_color
                ax[subplot].text(p.get_x() + p.get_width()/2., p.get_height(), '{0:.2f}'.format(p.get_height()), fontsize=12, color=label_color, ha='center', va='bottom')
    
    ax["lower"].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    ax["lower left"].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    ax["lower right"].hlines(y = 0.5, xmin = 0, xmax = 6, color = 'black', linewidth=2)
    
    
    sns.despine()
        
    plt.savefig(f"barplot_{pattern}.png")

## 3.0 Long and short candles

In [None]:
upper_th = 0.10
lower_th = 0.01

small_candle = coin_df[
    (coin_df["candle"] < coin_df["body_mean"] * lower_th) 
]

medium_candle = coin_df[
    (coin_df["candle"] > coin_df["body_mean"] * lower_th) 
    & (coin_df["candle"] < coin_df["body_mean"] * upper_th)
]

large_candle = coin_df[
    (coin_df["candle"] > coin_df["body_mean"] * upper_th)
]

small_candle["small_candle"] = 1
medium_candle["medium_candle"] = 1
large_candle["large_candle"] = 1

coin_df = coin_df.join(small_candle["small_candle"]).fillna(0)
coin_df = coin_df.join(medium_candle["medium_candle"]).fillna(0)
coin_df = coin_df.join(large_candle["large_candle"]).fillna(0)

## 3.1 Baseline market movement metrics

In [None]:
# calc_metrics_2(coin_df.reset_index(), "asset_ID", ["upper", "lower"], [[6], [6]])
calc_metrics(coin_df.reset_index(), "asset_ID", [[0, 0], [1, 0], [0, 2]], [[6], [6], [4]])

How to read the chart:
* The upper chart shows mean log-returns comparing today and day x in the future.
* The lower chart shows the probability of positive market returns comparing today and day x in the future.
* Grey bars indicate statistically insignificant results (z-test).
* Left charts shows general market direction after certain canldestick pattern
* Middle charts shows market direction after certain canldestick pattern when market is in the downtrend
* Right charts shows market direction after certain canldestick pattern when market is in the uptrend

## 3.2 Bullish Marubozu

In [None]:
bull_marubozu = coin_df[
    ((coin_df["high"] - coin_df["close"]) / coin_df["close"] < 0.003)
    & ((coin_df["low"] - coin_df["open"]) / coin_df["open"] < 0.003)
    & (coin_df["upper_shadow_v_real_body"] < 0.09)
    & (coin_df["lower_shadow_v_real_body"] < 0.09)
    & (coin_df["close"] > coin_df["open"])
#     & (coin_df["medium_candle"] == 1)
]

bull_marubozu["bull_marubozu"] = 1
coin_df2 = coin_df.join(bull_marubozu["bull_marubozu"]).fillna(0)

# calc_metrics_2(coin_df2, "bull_marubozu", ["upper","lower", "lower left", "upper right", "lower right"], [[6], [6], [6], [6], [6]])
calc_metrics(coin_df2, "bull_marubozu", [[0, 0], [1, 0], [1, 1], [0, 2], [1, 2]], [[6], [6], [6], [6], [6]])

plot_patterns(coin_df2[coin_df2["bull_marubozu"] > 0][:20], False, True)

## 3.3 Bearish Marubozu

In [None]:
bear_marubozu = coin_df[
    ((coin_df["low"] - coin_df["close"]) / coin_df["close"] < 0.003)
    & ((coin_df["high"] - coin_df["open"]) / coin_df["open"] < 0.003) 
    & (coin_df["upper_shadow_v_real_body"] < 0.09)
    & (coin_df["lower_shadow_v_real_body"] < 0.09)
    & (coin_df["close"] < coin_df["open"])
#     & (coin_df["medium_candle"] == 1)
]

bear_marubozu["bear_marubozu"] = 1
coin_df2 = coin_df.join(bear_marubozu["bear_marubozu"]).fillna(0)

# calc_metrics_2(coin_df2, "bear_marubozu", ["upper", "lower", "upper left", "lower left"], [[4], [4], [4], [4]])
calc_metrics(coin_df2, "bear_marubozu", [[0, 0], [1, 0], [0, 1], [1, 1]], [[4], [4], [4], [4]])

plot_patterns(coin_df2[coin_df2["bear_marubozu"] > 0][:20], False, True)

## 3.4 Hammer

In [None]:
thr = 1.7

hammer = coin_df[
    ((coin_df["high"] - coin_df["close"]) * thr < (coin_df["close"] - coin_df["open"]))
    & ((coin_df["close"] - coin_df["open"]) * thr < (coin_df["open"] - coin_df["low"]))
    & (coin_df["close"] > coin_df["open"])
#     & (coin_df["medium_candle"] == 1)
]

hammer["hammer"] = 1
coin_df2 = coin_df.join(hammer["hammer"]).fillna(0)

# calc_metrics_2(coin_df2, "hammer", ["upper", "lower", "upper right", "lower right"], [[6], [6], [6], [6]])
calc_metrics(coin_df2, "hammer", [[0, 0], [1, 0], [0, 2], [1, 2]], [[6], [6], [6], [6]])

plot_patterns(coin_df2[coin_df2["hammer"] > 0][:20], False, True)

## 3.5 Hanging man

In [None]:
thr = 2.2

hang_man = coin_df[
    ((coin_df["high"] - coin_df["open"]) * thr < (coin_df["open"] - coin_df["close"]))
    & ((coin_df["open"] - coin_df["close"]) * thr < (coin_df["close"] - coin_df["low"]))
    & (coin_df["close"] < coin_df["open"])
#     & (coin_df["medium_candle"] == 1)
]

hang_man["hang_man"] = 1
coin_df2 = coin_df.join(hang_man["hang_man"]).fillna(0)

# calc_metrics_2(coin_df2, "hang_man", ["upper", "lower"], [[0], [6]])
calc_metrics(coin_df2, "hang_man", [[0, 0], [1, 0], [1, 1]], [[0], [6], [6]])

plot_patterns(coin_df2[coin_df2["hang_man"] > 0][:20], False, True)

## 3.6 Inverted hammer

In [None]:
thr = 2

inv_hammer = coin_df[
    ((coin_df["open"] - coin_df["low"]) * thr < (coin_df["close"] - coin_df["open"]))
    & ((coin_df["close"] - coin_df["open"]) * thr < (coin_df["high"] - coin_df["close"]))
    & (coin_df["close"] > coin_df["open"])
    #& (coin_df["medium_candle"] == 1)
]

inv_hammer["inv_hammer"] = 1
coin_df2 = coin_df.join(inv_hammer["inv_hammer"]).fillna(0)

# calc_metrics_2(coin_df2, "inv_hammer", ["upper", "lower", "upper left", "lower left"], [[4], [4], [4], [5]])
calc_metrics(coin_df2, "inv_hammer", [[0, 0], [1, 0], [0, 1], [1, 1]], [[4], [4], [4], [5]])

plot_patterns(coin_df2[coin_df2["inv_hammer"] > 0][:20], False, True)

## 3.7 Shooting star

In [None]:
thr = 2

shot_star = coin_df[    
    ((coin_df["close"] - coin_df["low"]) * thr < (coin_df["open"] - coin_df["close"]))
    & ((coin_df["open"] - coin_df["close"]) * thr < (coin_df["high"] - coin_df["open"]))
    & (coin_df["close"] < coin_df["open"])
#     & (coin_df["medium_candle"] == 1)
]

shot_star["shot_star"] = 1
coin_df2 = coin_df.join(shot_star["shot_star"]).fillna(0)

# calc_metrics_2(coin_df2, "shot_star", ["upper","lower", "upper left", "lower left", "upper right", "lower right"], [[5], [5], [6], [5], [4], [5]])
calc_metrics(coin_df2, "shot_star", [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]], [[5], [5], [6], [5], [4], [5]])
plot_patterns(coin_df2[coin_df2["shot_star"] > 0][:20], False, True)

## 3.8 Spinning top

In [None]:
spin_top = coin_df[
    (coin_df["real_body"] / coin_df["upper_shadow"] < 0.20)
    & (coin_df["real_body"] / coin_df["lower_shadow"] < 0.20)
    & (coin_df["real_body"] / coin_df["upper_shadow"] > 0.10)
    & (coin_df["real_body"] / coin_df["lower_shadow"] > 0.10)
    & (coin_df["upper_shadow"] / coin_df["lower_shadow"] > 0.95)
    & (coin_df["upper_shadow"] / coin_df["lower_shadow"] < 1.05)
]

spin_top["spin_top"] = 1
coin_df2 = coin_df.join(spin_top["spin_top"]).fillna(0)

# calc_metrics_2(coin_df2, "spin_top", ["upper","lower", "upper left", "lower left", "upper right", "lower right"], [[5], [5], [6], [5], [4], [5]])
calc_metrics(coin_df2, "spin_top", [[0, 0], [1, 0], [0, 1], [1, 1], [1, 2]], [[6], [6], [6], [6], [4]])
plot_patterns(coin_df2[coin_df2["spin_top"] > 0][:20], False, True)

## 3.9 Doji

In [None]:
doji = coin_df[
    (coin_df["real_body"] / coin_df["lower_shadow"] < 0.10)
    & (coin_df["real_body"] / coin_df["upper_shadow"] < 0.10)
    & (coin_df["upper_shadow"] / coin_df["lower_shadow"] > 0.95)
    & (coin_df["upper_shadow"] / coin_df["lower_shadow"] < 1.05)
]


doji["doji"] = 1
coin_df2 = coin_df.join(doji["doji"]).fillna(0)

# calc_metrics(coin_df2, "doji", "")
calc_metrics(coin_df2, "doji", [[0, 0], [1, 0], [0, 1], [1, 1], [1, 2]], [[6], [4], [6], [4], [4]])

plot_patterns(coin_df2[coin_df2["doji"] > 0][:20], False, True)