## Classify candlestick patterns

All patterns in https://thepatternsite.com/Candles2.html will be detected and classified into bullish/ bearish. Assume taking long/short positions at t=0candle accordingly with suitable stop loss/gain, calculate the profit earned at t=5, 10, 15, 20candles. Each candle represent a 1h or 4hr interval. 

In [41]:
import time
# import warnings
from datetime import datetime, timezone
from typing import List, Union
import requests
import pandas as pd
from price_data import BinanceAPI 
from matplotlib import pyplot as plt

bn = BinanceAPI()

Fetch data

In [118]:
btc_4h = pd.read_csv('./csv_files/btc_4h.csv', header=0).set_index('date_time')
eth_4h = pd.read_csv('./csv_files/eth_4h.csv', header=0).set_index('date_time')
# btc_4h = bn.get_kline(symbol = 'BTCUSDT', interval = '4h', instrument_type='spot',start_date = '2022-01-01 00:00:00', end_date='2023-12-31 23:59:59')
# btc_4h[['open', 'high', 'low', 'close', 'volume']
#        ] = btc_4h[['open', 'high', 'low', 'close', 'volume']].astype(float)

# eth_4h = bn.get_kline(symbol = 'ETHUSDT', interval = '4h', instrument_type='spot',start_date = '2022-01-01 00:00:00', end_date='2023-12-31 23:59:59')
# eth_4h[['open', 'high', 'low', 'close', 'volume']
#        ] = eth_4h[['open', 'high', 'low', 'close', 'volume']].astype(float)
# data_4h = [btc_4h, eth_4h]

# btc_4h.to_csv('./csv_files/btc_4h.csv')
# eth_4h.to_csv('./csv_files/eth_4h.csv')

Variable and mapping set up

In [119]:
metadata = pd.DataFrame([], columns=['symbol_interval', 'gain_loss_ratio', 'pattern', 'exposure','mean', 'Q0', 'Q1', 'Q2', 'Q3', 'Q4','skew', 'kurt'])
coin_sym_to_dict = {'BTCUSDT_4h': btc_4h, 'ETHUSDT_4h': eth_4h}
coin_sym_data= None

In [120]:
metadata

Unnamed: 0,symbol_interval,gain_loss_ratio,pattern,exposure,mean,Q0,Q1,Q2,Q3,Q4,skew,kurt


In [121]:
def compute_ret_profit(signal, stop_loss,stop_gain, start_cand:int, exposure:int):
  open_price = coin_sym_data['open'].iloc[start_cand]
  exit_price = None
  
  if signal == 'bull':  # long position
    for cand in range(start_cand, start_cand+exposure):
      if coin_sym_data['low'].iloc[cand] <= stop_loss:
        exit_price = stop_loss
      elif coin_sym_data['high'].iloc[cand] >= stop_gain:
        exit_price = stop_gain
    exit_price = coin_sym_data['close'].iloc[start_cand+exposure-1]
    profit = exit_price - open_price
    return  profit/open_price
  
  elif signal == 'bear': # short position
    for cand in range(start_cand, start_cand+exposure):
      if coin_sym_data['high'].iloc[cand] >= stop_loss:
        exit_price = stop_loss
      elif coin_sym_data['low'].iloc[cand] <= stop_gain:
        exit_price = stop_gain
    exit_price = coin_sym_data['close'].iloc[start_cand+exposure-1]
    profit = open_price - exit_price
    return profit/open_price
  
  return None, None

In [136]:
def above_the_stomach(i, gain_loss_ratio) -> Union[dict, None]:
  conditions = [
    # coin_sym_data['open'].iloc[i-5] > coin_sym_data['close'].iloc[i-5],
                coin_sym_data['open'].iloc[i-4] > coin_sym_data['close'].iloc[i-4] ,
                coin_sym_data['open'].iloc[i-3] > coin_sym_data['close'].iloc[i-3] ,
                coin_sym_data['open'].iloc[i-2] > coin_sym_data['close'].iloc[i-2],
                coin_sym_data['open'].iloc[i-1] < coin_sym_data['close'].iloc[i-1],
                coin_sym_data['open'].iloc[i-1] >= (coin_sym_data['open'].iloc[i-2] + coin_sym_data['close'].iloc[i-2])/2]
  if not all(conditions): 
    return None, None, None
  # print(i)
  stop_loss = coin_sym_data['low'].iloc[i-2] * .9995
  stop_gain = (coin_sym_data['close'].iloc[i-1] - stop_loss) * \
      gain_loss_ratio + coin_sym_data['close'].iloc[i-1]
  return 'bull', stop_loss, stop_gain


def below_the_stomach(i, gain_loss_ratio) -> Union[dict, None]:
  conditions = [coin_sym_data['open'].iloc[i-5] < coin_sym_data['close'].iloc[i-5],
                coin_sym_data['open'].iloc[i-4] < coin_sym_data['close'].iloc[i-4],
                coin_sym_data['open'].iloc[i-3] < coin_sym_data['close'].iloc[i-3],
                coin_sym_data['open'].iloc[i-2] < coin_sym_data['close'].iloc[i-2],
                coin_sym_data['open'].iloc[i-1] > coin_sym_data['close'].iloc[i-1],
                coin_sym_data['open'].iloc[i-1] <= (coin_sym_data['open'].iloc[i-2] + coin_sym_data['close'].iloc[i-2])/2]
  if not all(conditions):
    return None, None, None
  # print(i)
  stop_loss = coin_sym_data['high'].iloc[i-2] * 1.0005
  stop_gain = coin_sym_data['close'].iloc[i-1]- (stop_loss - coin_sym_data['close'].iloc[i-1] ) * gain_loss_ratio 
  return 'bear', stop_loss, stop_gain

In [137]:
patterns_lib = {'above_the_stomach': above_the_stomach}

In [138]:
def detect_pattern_compute_stat(symbol_interval: str, pattern: str, exposure:int, gain_loss_ratio = 3)-> pd.DataFrame:

  df = []
  for i in range(5, len(coin_sym_data.index)-10):
    signal, stop_loss, stop_gain = patterns_lib[pattern](i, gain_loss_ratio=gain_loss_ratio)
    # print(i)
    if signal == None:
      continue 
    ret=  compute_ret_profit(signal=signal, stop_loss=stop_loss,
                                      stop_gain=stop_gain, start_cand=i, exposure=exposure)
    df.append({'time': coin_sym_data.index[i], 'signal': signal,'ret': ret})
  display(df)
  df = pd.DataFrame(df,index= 'time')

  quantiles = list(df['ret'].quantile([0,.25, .5, .75, 1]))
  skew = df['ret'].skew()
  kurt = df['ret'].kurt()
  # display(df.describe())

  fig, graph = plt.subplots()
  graph.hist(df['ret'].sort_values(ignore_index=True), bins=int(len(df.index)*2/3))
  plt.ylabel('Frequency')
  plt.title(f'{symbol_interval}_gainlossratio={gain_loss_ratio}_pattern={pattern}_exposure={exposure}')
  plt.xlabel(f'q0-4={[ round(q, 5) for q in quantiles]}\nskew={skew:.5f}_kurt={kurt:5f}')
  plt.savefig(f"./imagelib/{symbol_interval}_gain_loss_ratio={gain_loss_ratio}_pattern={pattern}_exposure={exposure}.png")
  # display(graph)
  return pd.DataFrame([{'symbol_interval': symbol_interval,
                      'gain_loss_ratio': gain_loss_ratio,
                        'pattern': pattern,
                        'exposure': exposure,
                        'mean': df['ret'].mean(),
                        'Q0': quantiles[0],
                        'Q1': quantiles[1],
                        'Q2': quantiles[2],
                        'Q3': quantiles[3],
                        'Q4': quantiles[4],
                        'skew': skew,
                        'kurt': kurt}])

In [130]:
# global coin_sym_data
for this_coin_sym in ['BTCUSDT_4h','ETHUSDT_4h']:
  coin_sym_data = coin_sym_to_dict[this_coin_sym]
  for pattern in ['above_the_stomach']:
    for exposure in [1, 2, 6, 18]:
      metadata_row=detect_pattern_compute_stat(
          symbol_interval=this_coin_sym, pattern=pattern, exposure=exposure, gain_loss_ratio=3)
      metadata = pd.concat([metadata, metadata_row])

KeyError: "None of ['time'] are in the columns"

In [None]:
metadata

Unnamed: 0,symbol_interval,gain_loss_ratio,pattern,exposure,mean,Q0,Q1,Q2,Q3,Q4,skew,kurt
0,BTCUSDT_4h,3,above_the_stomach,1,0.004564,-0.011412,0.000458,0.002889,0.007261,0.031565,1.150821,2.648827
0,BTCUSDT_4h,3,above_the_stomach,2,0.010873,-0.005426,0.003618,0.007924,0.015468,0.044367,1.285758,1.870196
0,BTCUSDT_4h,3,above_the_stomach,6,0.009691,-0.022933,-0.001319,0.00729,0.015412,0.062485,1.246902,2.443263
0,BTCUSDT_4h,3,above_the_stomach,18,0.01856,-0.227021,-0.009674,0.009589,0.043173,0.198789,-0.646664,4.458435
0,ETHUSDT_4h,3,above_the_stomach,1,0.007954,-0.01681,0.000618,0.004122,0.012609,0.032313,0.851573,0.463487
0,ETHUSDT_4h,3,above_the_stomach,2,0.017851,-0.000805,0.006502,0.012411,0.023858,0.084272,1.880313,5.006483
0,ETHUSDT_4h,3,above_the_stomach,6,0.022894,-0.033299,-0.000407,0.013843,0.037226,0.128782,1.300357,1.779276
0,ETHUSDT_4h,3,above_the_stomach,18,0.024514,-0.112053,-0.0194,0.014881,0.054012,0.193316,0.673122,0.102781


In [139]:
this_coin_sym = 'BTCUSDT'
pattern = 'above_the_stomach'
exposure = 18

detect_pattern_compute_stat(
    symbol_interval=this_coin_sym, pattern=pattern, exposure=exposure, gain_loss_ratio=3)

[]

TypeError: Index(...) must be called with a collection of some kind, 'time' was passed

In [None]:
btc_4h

Unnamed: 0_level_0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-01 00:00:00,1.640995e+12,46216.93,46949.99,46208.37,46813.20,3495.20465,1641009599999,163091065.39066450,114726,1751.87928000,81717413.23587420,0
2022-01-01 04:00:00,1.641010e+12,46813.21,47555.55,46591.23,47194.73,3522.79888,1641023999999,165801706.89285300,115150,1764.44663000,83063078.92016720,0
2022-01-01 08:00:00,1.641024e+12,47194.73,47344.69,46715.39,46758.87,2729.67811,1641038399999,128308435.21243450,109398,1311.57332000,61656843.35882320,0
2022-01-01 12:00:00,1.641038e+12,46758.87,47491.14,46756.30,47219.04,3055.90946,1641052799999,143990904.71209970,109116,1551.05367000,73095542.26686220,0
2022-01-01 16:00:00,1.641053e+12,47219.04,47954.63,47186.07,47329.78,4061.67681,1641067199999,193002811.55339300,154496,2050.68014000,97434848.95431000,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-12 12:00:00,1.712923e+12,70904.00,70975.54,69236.95,69347.30,10668.68172,1712937599999,744967842.56112640,519826,4570.11331000,319105529.43274980,0
2024-04-12 16:00:00,1.712938e+12,69347.30,69580.00,65086.86,66843.98,26206.56931,1712951999999,1766566660.36843640,1056803,12139.05984000,818610089.56864780,0
2024-04-12 20:00:00,1.712952e+12,66843.98,67440.00,66680.84,67116.52,7036.87893,1712966399999,471759010.61475860,317945,3406.15659000,228350811.39837640,0
2024-04-13 00:00:00,1.712966e+12,67116.52,67168.00,65731.27,67160.55,7747.05556,1712980799999,515311798.40251960,463310,3541.07928000,235489697.35932610,0
