In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
from dataset import Dataset
from features import StrategiesSignals
import plotly.graph_objects as go

# Download data from binance

In [2]:
TICKER = "BTCUSDT"
data_binance = Dataset()
data = data_binance.get_data(days=90, ticker=TICKER, ts='5m')

In [3]:
data.tail(5)

Unnamed: 0_level_0,open,high,low,close,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-08-29 23:45:00,48911.75,48974.75,48850.0,48859.43,87.47679
2021-08-29 23:50:00,48859.43,48930.0,48852.0,48918.77,55.33471
2021-08-29 23:55:00,48918.78,48918.78,48861.36,48892.27,53.8626
2021-08-30 00:00:00,48892.27,48892.28,48737.27,48767.83,133.45469
2021-08-30 00:05:00,48767.84,48861.63,48571.42,48705.27,336.71263


# Create target variables

In [4]:
size_body = 0.8
def find_candle(candle, size_body):
    rule_1 = (candle.get('close') - candle.get('low')) / (.001 + candle.get('high') - candle.get('low'))
    rule_2 = (candle.get('open') - candle.get('low')) / (.001 + candle.get('high') - candle.get('low'))
    rule_3 = candle.get('close') > candle.get('open')
    
    # Define rules
    bar = (
        ((candle.get('high') - candle.get('low')) > 3*(candle.get('open') - candle.get('close'))) &
        ((rule_1 > size_body) & (rule_1 < 1)) &
        ((rule_2 > size_body) & (rule_2 < 1)) &
        (rule_3))
    return 1 if bar else 0


In [5]:
# Look for the biggest volume for the last 1 hour (12 bars)
data['rolling_volume'] = data['volume'].rolling(5).max()
data[['volume', 'rolling_volume']].tail(10)

Unnamed: 0_level_0,volume,rolling_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-29 23:20:00,79.37921,102.21403
2021-08-29 23:25:00,96.82363,102.21403
2021-08-29 23:30:00,202.84696,202.84696
2021-08-29 23:35:00,169.28798,202.84696
2021-08-29 23:40:00,77.13982,202.84696
2021-08-29 23:45:00,87.47679,202.84696
2021-08-29 23:50:00,55.33471,202.84696
2021-08-29 23:55:00,53.8626,169.28798
2021-08-30 00:00:00,133.45469,133.45469
2021-08-30 00:05:00,336.71263,336.71263


In [6]:
# Look for correction
data['rolling_price_min'] = data['close'].rolling(12).min()
data[['close', 'rolling_price_min']].tail(10)

Unnamed: 0_level_0,close,rolling_price_min
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-29 23:20:00,49097.86,49025.01
2021-08-29 23:25:00,49032.67,49025.01
2021-08-29 23:30:00,48968.19,48968.19
2021-08-29 23:35:00,48896.3,48896.3
2021-08-29 23:40:00,48911.75,48896.3
2021-08-29 23:45:00,48859.43,48859.43
2021-08-29 23:50:00,48918.77,48859.43
2021-08-29 23:55:00,48892.27,48859.43
2021-08-30 00:00:00,48767.83,48767.83
2021-08-30 00:05:00,48705.27,48705.27


In [7]:
data['signal'] = 0
for index, value in data.iterrows():
    # find pattern
    data.loc[index, 'signal'] = find_candle(value, size_body)    
    # find the biggest volume
    data.loc[index, 'signal_volume'] = 1 if value.get('volume') == value.get('rolling_volume') else 0
    # find correction
    data.loc[index, 'signal_correction'] = 1 if (value.get('close') / value.get('rolling_price_min')) -1 > -0.005 else 0

# Plot patterns

In [8]:
indexex = data[data['signal'] == 1].index
temp = data[data.index == indexex[1]]
fig = go.Figure(data=[go.Candlestick(x=temp.index,
                open=temp['open'],
                high=temp['high'],
                low=temp['low'],
                close=temp['close'])])

fig.show()

In [9]:
temp = data[
    (data['signal'] == 1) &
    (data['signal_volume'] == 1) &
    (data['signal_correction'] == 1)
]
fig = go.Figure()
fig.add_trace(go.Scatter(x=temp.index, y=temp['close'], mode="markers", name='signals'))

fig.add_trace(go.Candlestick(x=data.index,
                             open=data['open'],
                             high=data['high'],
                             low=data['low'],
                             close=data['close'])
              )


fig.show()


In [10]:
# data.groupby('signal').count()
data.groupby(['signal', 'signal_correction', 'signal_volume']).count().T

signal,0,0,0,0,1,1
signal_correction,0.0,0.0,1.0,1.0,1.0,1.0
signal_volume,0.0,1.0,0.0,1.0,0.0,1.0
open,10,1,20440,5180,189,47
high,10,1,20440,5180,189,47
low,10,1,20440,5180,189,47
close,10,1,20440,5180,189,47
volume,10,1,20440,5180,189,47
rolling_volume,6,1,20440,5180,189,47
rolling_price_min,0,0,20440,5180,189,47


In [14]:

# create lags
lags = range(1, 20)

# run loop to calculate target percent change
for i in lags:
    data[f'lag_{i}'] = data['close'].pct_change(-i)

# generate names of colimns
name_lags = [f'lag_{i}' for i in lags]
# Append our signals
name_lags.append('signal')
name_lags.append('signal_volume')
name_lags.append('signal_correction')
stat_table = data[name_lags].groupby(['signal', 'signal_correction', 'signal_volume']).agg(['median', 'count', 'min', 'max']).T #.plot()
stat_table.head(50)

Unnamed: 0_level_0,signal,0,0,0,0,1,1
Unnamed: 0_level_1,signal_correction,0.0,0.0,1.0,1.0,1.0,1.0
Unnamed: 0_level_2,signal_volume,0.0,1.0,0.0,1.0,0.0,1.0
lag_1,median,-0.000311,0.000999,4e-06,-9e-06,0.000144,-0.000113
lag_1,count,10.0,1.0,20440.0,5179.0,189.0,47.0
lag_1,min,-0.003634,0.000999,-0.034128,-0.05327,-0.007414,-0.004163
lag_1,max,0.002718,0.000999,0.028599,0.026319,0.007558,0.003373
lag_2,median,-0.001087,0.002814,-1e-05,2e-05,0.000106,2.8e-05
lag_2,count,10.0,1.0,20440.0,5178.0,189.0,47.0
lag_2,min,-0.006876,0.002814,-0.034422,-0.065178,-0.013956,-0.007513
lag_2,max,0.002096,0.002814,0.037996,0.039474,0.010395,0.005007
lag_3,median,-0.001756,0.001635,-6.1e-05,3.2e-05,-1e-05,0.000607
lag_3,count,10.0,1.0,20439.0,5178.0,189.0,47.0


In [15]:
# Look for the best returns
stat_table = stat_table.rename_axis(["lvl0", "lvl1"])
stat_table.groupby(level=1).max()
stat_table.loc[stat_table.query("lvl1 == 'median'").idxmax(), :]

Unnamed: 0_level_0,signal,0,0,0,0,1,1
Unnamed: 0_level_1,signal_correction,0.0,0.0,1.0,1.0,1.0,1.0
Unnamed: 0_level_2,signal_volume,0.0,1.0,0.0,1.0,0.0,1.0
lvl0,lvl1,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
lag_19,median,0.012034,0.024315,-0.000337,-0.000276,0.000771,-0.000272
lag_19,median,0.012034,0.024315,-0.000337,-0.000276,0.000771,-0.000272
lag_1,median,-0.000311,0.000999,4e-06,-9e-06,0.000144,-0.000113
lag_5,median,-0.001652,0.000223,-9.3e-05,3.9e-05,0.000251,0.0007
lag_18,median,0.009637,0.020634,-0.000339,-0.000237,0.00114,0.001747
lag_18,median,0.009637,0.020634,-0.000339,-0.000237,0.00114,0.001747


In [16]:
stat_table.tail(50)

Unnamed: 0_level_0,signal,0,0,0,0,1,1
Unnamed: 0_level_1,signal_correction,0.0,0.0,1.0,1.0,1.0,1.0
Unnamed: 0_level_2,signal_volume,0.0,1.0,0.0,1.0,0.0,1.0
lvl0,lvl1,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
lag_7,min,-0.006013,0.002047,-0.07989,-0.060723,-0.029235,-0.0109
lag_7,max,0.005528,0.002047,0.051354,0.064737,0.021724,0.015225
lag_8,median,-0.002095,0.00413,-0.000205,-4.3e-05,0.000697,-0.000632
lag_8,count,10.0,1.0,20435.0,5177.0,189.0,47.0
lag_8,min,-0.008598,0.00413,-0.081047,-0.055994,-0.029988,-0.008421
lag_8,max,0.011749,0.00413,0.061806,0.060219,0.026721,0.013735
lag_9,median,-0.000418,0.006906,-0.000224,-0.000113,0.000503,-0.000532
lag_9,count,10.0,1.0,20434.0,5177.0,189.0,47.0
lag_9,min,-0.007414,0.006906,-0.082462,-0.063149,-0.028052,-0.0088
lag_9,max,0.010561,0.006906,0.061108,0.059576,0.026286,0.019944
