# Get Candlestick Patterns

## Import relevant libraries

In [1]:
import os
import pandas as pd
import talib
import mplfinance as mpf
import numpy as np
import matplotlib.pyplot as plt
import datetime
from configparser import ConfigParser

In [4]:
configur = ConfigParser()
print (configur.read('../../config_files/config_candlestick_patterns.ini'))
#print ("Sections : ", configur.sections())

['../../config_files/config_candlestick_patterns.ini']


## Get the data

### Parameters

In [10]:
asset_ticket = "BTCUSDT"
timestamp = "1d"
start_date = "1 Jan, 2017"
end_date = "31 Dec, 2022"

In [11]:
input_data_path = '../../../data/05_raw_group'
input_data_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d')
input_data_extension = ".csv"
full_path_input_data = os.path.join(input_data_path, input_data_filename + input_data_extension)

In [12]:
df = pd.read_csv(full_path_input_data)

In [13]:
df.columns

Index(['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volumne', 'number_of_trades',
       'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore',
       'formatted_open_time', 'formatted_close_time', 'upper_shadow',
       'lower_shadow', 'real_body'],
      dtype='object')

## Identify Candlestick Patterns

In [32]:
candle_names = talib.get_function_groups()['Pattern Recognition']

- Inverted Hammer --> Single [Bullish]
- Hammer --> Single [Bullish]
- Bullish Engulfing (not available at ta-lib) --> Double [Bullish]
- Piercing Pattern --> Double [Bullish]
- Morning Star --> Triple [Bullish]


- Shooting Star --> Single [Bearish]
- Hanging Man --> Single [Bearish]
- Bearish Engulfing (not available at ta-lib) --> Double [Bearish]
- Dark Cloud Cover --> Double [Bearish]
- Evening Star --> Triple [Bearish]

- Engulfing [positive: Bullish / negative: Bearish]

In [33]:
candle_names = [
    'CDLINVERTEDHAMMER',
    'CDLHAMMER',
    'CDLPIERCING',
    'CDLMORNINGSTAR',
    'CDLSHOOTINGSTAR',
    'CDLHANGINGMAN',
    'CDLDARKCLOUDCOVER',
    'CDLEVENINGSTAR',
    'CDLENGULFING'
]

In [34]:
# extract OHLC 
op = df['open']
hi = df['high']
lo = df['low']
cl = df['close']

# create columns for each pattern
for candle in candle_names:
    # below is same as;
    # df["CDL3LINESTRIKE"] = talib.CDL3LINESTRIKE(op, hi, lo, cl)
    df[candle] = getattr(talib, candle)(op, hi, lo, cl)

In [35]:
df.sample(10)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,real_body,CDLINVERTEDHAMMER,CDLHAMMER,CDLPIERCING,CDLMORNINGSTAR,CDLSHOOTINGSTAR,CDLHANGINGMAN,CDLDARKCLOUDCOVER,CDLEVENINGSTAR,CDLENGULFING
1702,1651276800000,38596.11,38795.38,37578.2,37630.8,35321.18989,1651363199999,1354901000.0,1017369,16523.43017,...,965.31,0,0,0,0,0,0,0,0,0
50,1508544000000,6013.72,6171.0,5850.03,6024.97,1664.307693,1508630399999,10114820.0,10182,796.022575,...,11.25,0,0,0,0,0,0,0,0,0
729,1567209600000,9582.76,9684.51,9420.75,9587.47,17130.290074,1567295999999,164116800.0,199188,8558.148214,...,4.71,0,0,0,0,0,0,0,0,0
1393,1624579200000,34663.08,35500.0,31275.0,31584.45,116061.130356,1624665599999,3846121000.0,2094932,55656.156252,...,3078.63,0,0,0,0,0,0,0,0,0
999,1590537600000,8841.0,9225.0,8811.73,9204.07,68910.355514,1590623999999,624660900.0,662850,33532.620265,...,363.07,0,0,0,0,0,0,0,0,100
489,1546473600000,3857.57,3862.74,3730.0,3766.78,29406.948359,1546559999999,111657400.0,199812,14793.083267,...,90.79,0,0,0,0,0,0,0,0,0
931,1584662400000,6162.05,6900.0,5670.0,6208.36,219298.329514,1584748799999,1401734000.0,1580069,104719.63918,...,46.31,0,0,0,0,0,0,0,0,0
1350,1620864000000,49537.15,51367.19,46000.0,49670.97,147332.002121,1620950399999,7303695000.0,3570506,71742.003359,...,133.82,0,0,0,0,0,0,0,0,0
1075,1597104000000,11892.9,11935.0,11125.0,11392.08,90748.284634,1597190399999,1049616000.0,1343979,41221.961009,...,500.82,0,0,0,0,0,0,0,0,0
750,1569024000000,10167.92,10176.7,9900.0,9986.39,20544.175196,1569110399999,206034000.0,245439,9285.851479,...,181.53,0,0,0,0,0,0,0,0,0


## Split 'CDLENGULFING' into Bullish and Bearish one

In [36]:
list_open_time_bullish_engulfing = df[df['CDLENGULFING']==100]['date'].to_list()
list_open_time_bearish_engulfing = df[df['CDLENGULFING']==-100]['date'].to_list()

In [37]:
df['CDLENGULFINGBULLISH'] = np.where(df['date'].isin(list_open_time_bullish_engulfing), 100, 0)
df['CDLENGULFINGBEARISH'] = np.where(df['date'].isin(list_open_time_bearish_engulfing), -100, 0)

### Check

In [38]:
df[df['CDLENGULFING']==100].sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,CDLHAMMER,CDLPIERCING,CDLMORNINGSTAR,CDLSHOOTINGSTAR,CDLHANGINGMAN,CDLDARKCLOUDCOVER,CDLEVENINGSTAR,CDLENGULFING,CDLENGULFINGBULLISH,CDLENGULFINGBEARISH
58,1509235200000,5709.98,6189.88,5648.01,6169.98,1804.778173,1509321599999,10620970.0,10651,954.170196,...,0,0,0,0,0,0,0,100,100,0
1088,1598227200000,11648.12,11824.9,11585.09,11748.2,46212.391867,1598313599999,542299000.0,656584,22780.419488,...,0,0,0,0,0,0,0,100,100,0
1245,1611792000000,30362.19,33783.98,29842.1,33364.86,92621.145617,1611878399999,2953553000.0,2060660,46067.782326,...,0,0,0,0,0,0,0,100,100,0
1277,1614556800000,45134.11,49790.0,44950.53,49587.03,85086.111648,1614643199999,4057516000.0,2147223,42616.994361,...,0,0,0,0,0,0,0,100,100,0
960,1587168000000,7026.78,7293.08,7014.4,7248.6,49488.542819,1587254399999,354845100.0,494670,23980.644731,...,0,0,0,0,0,0,0,100,100,0


In [39]:
df[df['CDLENGULFING']==-100].sample(5)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,CDLHAMMER,CDLPIERCING,CDLMORNINGSTAR,CDLSHOOTINGSTAR,CDLHANGINGMAN,CDLDARKCLOUDCOVER,CDLEVENINGSTAR,CDLENGULFING,CDLENGULFINGBULLISH,CDLENGULFINGBEARISH
1389,1624233600000,35600.17,35750.0,31251.23,31608.93,168778.873159,1624319999999,5545926000.0,2688993,81297.469751,...,0,0,0,0,0,0,0,-100,0,-100
1713,1652227200000,31017.11,32162.59,27785.0,29103.94,207063.739278,1652313599999,6300296000.0,3559270,99622.117475,...,0,0,0,0,0,0,0,-100,0,-100
1273,1614211200000,49676.21,52041.73,46674.34,47073.73,83310.673121,1614297599999,4152189000.0,2305679,40610.664759,...,0,0,0,0,0,0,0,-100,0,-100
506,1547942400000,3665.75,3693.73,3475.0,3539.28,27901.938598,1548028799999,99767240.0,204941,13875.111381,...,0,0,0,0,0,0,0,-100,0,-100
468,1544659200000,3446.38,3460.0,3255.5,3302.06,40040.917013,1544745599999,134817100.0,160471,19671.315079,...,0,0,0,0,0,0,0,-100,0,-100


In [40]:
df.columns

Index(['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volumne', 'number_of_trades',
       'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore',
       'formatted_open_time', 'formatted_close_time', 'upper_shadow',
       'lower_shadow', 'real_body', 'CDLINVERTEDHAMMER', 'CDLHAMMER',
       'CDLPIERCING', 'CDLMORNINGSTAR', 'CDLSHOOTINGSTAR', 'CDLHANGINGMAN',
       'CDLDARKCLOUDCOVER', 'CDLEVENINGSTAR', 'CDLENGULFING',
       'CDLENGULFINGBULLISH', 'CDLENGULFINGBEARISH'],
      dtype='object')

## Replace first (based on the 'window_size') Candlestick values for 0

In [41]:
window_size = configur.getint('candlestick_patterns', 'window_size')

In [42]:
subset = ['CDLINVERTEDHAMMER', 'CDLHAMMER',
       'CDLPIERCING', 'CDLMORNINGSTAR', 'CDLSHOOTINGSTAR', 'CDLHANGINGMAN',
       'CDLDARKCLOUDCOVER', 'CDLEVENINGSTAR', 'CDLENGULFING',
       'CDLENGULFINGBULLISH', 'CDLENGULFINGBEARISH']

df.loc[:window_size, subset] = df.loc[:window_size, subset].replace([100, -100],[0, 0])

## Export the Data

### Export parameters

In [43]:
export_path = "../../../data/10_candlesticks_signals_raw"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_candlesticks_signals_raw"
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [44]:
df.to_csv(full_export_path, index=False)