### Imports

In [1]:
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time
from datetime import datetime, timedelta

## Constants

In [2]:
candle_stick_timeframe = 60 # as seconds
interval_min = int(candle_stick_timeframe/60)
prediction_candles = 10
duration = candle_stick_timeframe * prediction_candles
prediction_candles = -prediction_candles
accuracy_threshold = 0.6
candle_size_multiplier = 0
amount = 100
tick_symbol = "R_10"
count = 5000
data_dict ={
  "ticks_history":tick_symbol,
  "adjust_start_time": 1,
  "count": count,
  "end": "latest",
  "granularity":candle_stick_timeframe,
  "style": "candles"
}
duplicate_num = int(-prediction_candles -1)
# the number of times each column is to be repeated in the back story... 
# the total number of the same columns will hence be duplicate_num + 1 . In this case 6.

### Initialisation

### Download the data
Download the data once and train the model, the next data is only downloaded as 2 rows only and appended to already existing dataset for feature extraction and transformation

In [3]:
df = pd.read_csv("ETH_USD_1HR.csv")
del df['epoch']
df

Unnamed: 0,close,high,low,open
0,2052.5850,2056.895,2045.5260,2048.0650
1,2053.7550,2060.615,2050.1450,2052.5850
2,2053.6150,2061.845,2051.7550,2053.7550
3,2036.4850,2058.835,2032.4915,2053.3425
4,2031.3500,2040.395,2020.2950,2036.2850
...,...,...,...,...
4994,3323.4350,3328.195,3291.3950,3317.9950
4995,3306.7450,3331.734,3292.0050,3323.4350
4996,3278.6850,3312.755,3260.6965,3307.2150
4997,3259.6875,3291.445,3241.3980,3278.7950


In [4]:
data_history = df.copy()
data_history

Unnamed: 0,close,high,low,open
0,2052.5850,2056.895,2045.5260,2048.0650
1,2053.7550,2060.615,2050.1450,2052.5850
2,2053.6150,2061.845,2051.7550,2053.7550
3,2036.4850,2058.835,2032.4915,2053.3425
4,2031.3500,2040.395,2020.2950,2036.2850
...,...,...,...,...
4994,3323.4350,3328.195,3291.3950,3317.9950
4995,3306.7450,3331.734,3292.0050,3323.4350
4996,3278.6850,3312.755,3260.6965,3307.2150
4997,3259.6875,3291.445,3241.3980,3278.7950


## Feature Engineering
Here we introduce all the features we need (indicators, std, price action etc..)
* roc
* willims r indicator
* ATr
* cci
* stddev
* Aroon
* ema_9
* ema_20
* ema_50,
* ADX,
* RSI
* Candle size

### Trend Indicators
* EMA_9, EMA_20, EMA_50, Aroon indicator, adx indicator, cci indicator,

In [5]:
ema_9 = ta.trend.EMAIndicator(close=df['close'], window=9)
ema_20 = ta.trend.EMAIndicator(close=df['close'], window=20)
ema_50 = ta.trend.EMAIndicator(close=df['close'], window=50)
df = df.assign(ema_9 = ema_9.ema_indicator())
df = df.assign(ema_20 = ema_20.ema_indicator())
df = df.assign(ema_50 = ema_50.ema_indicator())
aroon = ta.trend.AroonIndicator(high=df['high'], low=df['low'], window=14).aroon_indicator()
df = df.assign(aroon=aroon)
adx = ta.trend.ADXIndicator(high=df['high'],low=df['low'], close=df['close'], window=14).adx()
df = df.assign(adx=adx)
cci = ta.trend.CCIIndicator(high=df['high'], low=df['low'], close=df['close'], window=14).cci()
df = df.assign(cci=cci)

### Momentum Indicators
* Rsi, candle size, ROC,Williams %r

In [6]:
candle_size = abs(df['close'] - df['open'])
df = df.assign(candle_size=candle_size)
rsi_value = ta.momentum.RSIIndicator(close=df['close'], window=14)
df = df.assign(rsi = rsi_value.rsi())
roc = ta.momentum.ROCIndicator(close=df['close'], window=5).roc()
df = df.assign(roc=roc)
williams = ta.momentum.WilliamsRIndicator(high=df['high'], low=df['low'], close=df['close'], lbp=14).williams_r()
df = df.assign(williams = williams)

## Volatility Indicators
* ATR indicator, BOllinger Bands Parameters, 

In [7]:
atr = ta.volatility.AverageTrueRange(high=df['high'],
                                     low=df['low'], 
                                     close=df['close'],
                                     window=14).average_true_range()
df = df.assign(atr=atr)
bol_width = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_wband()
df = df.assign(bol_width=bol_width)
bol_above = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_hband_indicator()
df = df.assign(bol_above=bol_above)
bol_below = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_lband_indicator()
df = df.assign(bol_below=bol_below)
average_candle_size = df['candle_size'].rolling(window=14).mean()
df = df.assign(average_candle_size = average_candle_size)
df

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,aroon,adx,cci,candle_size,rsi,roc,williams,atr,bol_width,bol_above,bol_below,average_candle_size
0,2052.5850,2056.895,2045.5260,2048.0650,,,,,0.000000,,4.5200,,,,0.000000,,0.0,0.0,
1,2053.7550,2060.615,2050.1450,2052.5850,,,,,0.000000,,1.1700,,,,0.000000,,0.0,0.0,
2,2053.6150,2061.845,2051.7550,2053.7550,,,,,0.000000,,0.1400,,,,0.000000,,0.0,0.0,
3,2036.4850,2058.835,2032.4915,2053.3425,,,,,0.000000,,16.8575,,,,0.000000,,0.0,0.0,
4,2031.3500,2040.395,2020.2950,2036.2850,,,,,0.000000,,4.9350,,,,0.000000,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4994,3323.4350,3328.195,3291.3950,3317.9950,3343.184033,3380.968287,3433.242135,-64.285714,64.330997,-93.495241,5.4400,25.326321,-1.766668,-51.165367,32.930919,6.016045,0.0,0.0,13.213179
4995,3306.7450,3331.734,3292.0050,3323.4350,3335.896226,3373.899402,3428.281463,-64.285714,65.129209,-84.035050,16.6900,22.814543,-0.007560,-57.479941,33.416496,5.919135,0.0,0.0,14.126036
4996,3278.6850,3312.755,3260.6965,3307.2150,3324.453981,3364.831364,3422.414935,-50.000000,66.118811,-110.290653,28.5300,19.341466,-1.173835,-70.801880,34.748068,5.885943,0.0,0.0,16.134607
4997,3259.6875,3291.445,3241.3980,3278.7950,3311.500685,3354.817663,3416.033467,-50.000000,67.167026,-125.431904,19.1075,17.409168,-1.798377,-79.043193,35.840849,6.312871,0.0,0.0,16.077107


In [8]:
dif = df.copy()
dif

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,aroon,adx,cci,candle_size,rsi,roc,williams,atr,bol_width,bol_above,bol_below,average_candle_size
0,2052.5850,2056.895,2045.5260,2048.0650,,,,,0.000000,,4.5200,,,,0.000000,,0.0,0.0,
1,2053.7550,2060.615,2050.1450,2052.5850,,,,,0.000000,,1.1700,,,,0.000000,,0.0,0.0,
2,2053.6150,2061.845,2051.7550,2053.7550,,,,,0.000000,,0.1400,,,,0.000000,,0.0,0.0,
3,2036.4850,2058.835,2032.4915,2053.3425,,,,,0.000000,,16.8575,,,,0.000000,,0.0,0.0,
4,2031.3500,2040.395,2020.2950,2036.2850,,,,,0.000000,,4.9350,,,,0.000000,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4994,3323.4350,3328.195,3291.3950,3317.9950,3343.184033,3380.968287,3433.242135,-64.285714,64.330997,-93.495241,5.4400,25.326321,-1.766668,-51.165367,32.930919,6.016045,0.0,0.0,13.213179
4995,3306.7450,3331.734,3292.0050,3323.4350,3335.896226,3373.899402,3428.281463,-64.285714,65.129209,-84.035050,16.6900,22.814543,-0.007560,-57.479941,33.416496,5.919135,0.0,0.0,14.126036
4996,3278.6850,3312.755,3260.6965,3307.2150,3324.453981,3364.831364,3422.414935,-50.000000,66.118811,-110.290653,28.5300,19.341466,-1.173835,-70.801880,34.748068,5.885943,0.0,0.0,16.134607
4997,3259.6875,3291.445,3241.3980,3278.7950,3311.500685,3354.817663,3416.033467,-50.000000,67.167026,-125.431904,19.1075,17.409168,-1.798377,-79.043193,35.840849,6.312871,0.0,0.0,16.077107


In [9]:
for column in dif.columns: # shift 1,2,3,4
    num = list(range(1,duplicate_num+1))
    new_columns = {}
    for n in num:
        new_column_name = f"{column}_{n}"
        # Assign the shifted values to the new column name in the dictionary
        new_columns[new_column_name] = df[column].shift(n)
        df = df.assign(**new_columns)

In [10]:
df.tail()

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,aroon,adx,cci,...,bol_below_9,average_candle_size_1,average_candle_size_2,average_candle_size_3,average_candle_size_4,average_candle_size_5,average_candle_size_6,average_candle_size_7,average_candle_size_8,average_candle_size_9
4994,3323.435,3328.195,3291.395,3317.995,3343.184033,3380.968287,3433.242135,-64.285714,64.330997,-93.495241,...,0.0,13.181036,13.749214,13.5925,16.028679,10.76725,9.990107,10.346821,11.035393,9.236607
4995,3306.745,3331.734,3292.005,3323.435,3335.896226,3373.899402,3428.281463,-64.285714,65.129209,-84.03505,...,1.0,13.213179,13.181036,13.749214,13.5925,16.028679,10.76725,9.990107,10.346821,11.035393
4996,3278.685,3312.755,3260.6965,3307.215,3324.453981,3364.831364,3422.414935,-50.0,66.118811,-110.290653,...,0.0,14.126036,13.213179,13.181036,13.749214,13.5925,16.028679,10.76725,9.990107,10.346821
4997,3259.6875,3291.445,3241.398,3278.795,3311.500685,3354.817663,3416.033467,-50.0,67.167026,-125.431904,...,0.0,16.134607,14.126036,13.213179,13.181036,13.749214,13.5925,16.028679,10.76725,9.990107
4998,3292.505,3302.403,3245.0825,3258.785,3307.701548,3348.883123,3411.189214,-42.857143,67.494243,-91.069597,...,0.0,16.077107,16.134607,14.126036,13.213179,13.181036,13.749214,13.5925,16.028679,10.76725


## Calculate the target class


In [11]:
prediction_candles

-10

In [12]:
predicted_price = df['close'].shift(prediction_candles)
df = df.assign(predicted_price = predicted_price)
df = df.dropna()
target = np.where(
    df['predicted_price'] > (df['close']),1,0)
  
del df['predicted_price']
df = df.assign(target=target)
df

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,aroon,adx,cci,...,average_candle_size_1,average_candle_size_2,average_candle_size_3,average_candle_size_4,average_candle_size_5,average_candle_size_6,average_candle_size_7,average_candle_size_8,average_candle_size_9,target
58,2090.305,2095.6550,2088.2950,2090.4350,2089.798561,2084.150053,2069.086286,-42.857143,35.952174,13.126507,...,6.093893,6.111036,6.301750,6.205321,5.903179,4.290321,6.956179,6.964036,7.109750,1
59,2085.235,2092.2640,2081.8650,2090.2690,2088.885849,2084.253381,2069.719569,-42.857143,34.925175,-40.661013,...,6.100321,6.093893,6.111036,6.301750,6.205321,5.903179,4.290321,6.956179,6.964036,1
60,2086.995,2086.9950,2082.7350,2085.2350,2088.507679,2084.514488,2070.397037,-42.857143,33.971533,-41.161122,...,6.338464,6.100321,6.093893,6.111036,6.301750,6.205321,5.903179,4.290321,6.956179,1
61,2091.765,2092.8450,2085.6350,2086.9300,2089.159143,2085.205013,2071.234996,7.142857,33.548433,27.044702,...,5.732429,6.338464,6.100321,6.093893,6.111036,6.301750,6.205321,5.903179,4.290321,1
62,2093.825,2095.7715,2090.0450,2091.7650,2090.092315,2086.025964,2072.120879,7.142857,33.374383,68.503320,...,5.027786,5.732429,6.338464,6.100321,6.093893,6.111036,6.301750,6.205321,5.903179,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4984,3401.595,3419.9950,3401.1850,3403.1535,3426.113786,3449.947843,3476.471092,-92.857143,45.225619,-90.422332,...,9.894571,8.611536,8.929393,9.305107,9.242964,8.948357,9.266214,6.231286,6.370571,0
4985,3406.175,3412.6350,3382.5850,3401.5950,3422.126029,3445.779001,3473.714382,-100.000000,47.872178,-97.721997,...,9.832321,9.894571,8.611536,8.929393,9.305107,9.242964,8.948357,9.266214,6.231286,0
4986,3370.585,3408.5250,3352.7305,3406.1750,3411.817823,3438.617668,3469.670093,-100.000000,50.602782,-142.170571,...,9.236607,9.832321,9.894571,8.611536,8.929393,9.305107,9.242964,8.948357,9.266214,0
4987,3372.425,3383.7250,3363.1450,3370.5850,3403.939258,3432.313604,3465.856560,-92.857143,53.138342,-141.522272,...,11.035393,9.236607,9.832321,9.894571,8.611536,8.929393,9.305107,9.242964,8.948357,0


In [13]:
df.target.unique()

array([1, 0])

In [14]:
x = df.drop(['target'], axis =1)
y = df['target']
# ext_data = x.loc[[544]]
# # y_external = y.loc[[544]]
# # y_external
# ext_data

In [15]:
random_state = 1
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=1)
model = RandomForestClassifier(n_estimators=1000, random_state=1) # keep it 100 for faster execution
model.fit(x_train, y_train)

In [16]:
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
acc

0.851063829787234

In [17]:
# model.predict(ext_data)

In [18]:
# model.predict_proba(ext_data)

In [19]:
y_probab = model.predict_proba(x_test)
compare = pd.DataFrame(y_probab)
new_x = np.where(y_probab[:, 0] >= accuracy_threshold, 1, 0)
new_y = np.where(y_probab[:, 1] >= accuracy_threshold, 1, 0)
true_value = np.array(y_test)
compare = compare.assign(new_x=new_x)
compare = compare.assign(new_y=new_y)
compare = compare.assign(true_value=true_value)
fall_value = compare['new_x'].value_counts()[1]
rise_value = compare['new_y'].value_counts()[1]
tp_n = np.sum((y_test==1) &(new_y==1))
tp = tp_n/rise_value * 100
tn_n = np.sum((y_test==0) & (new_x ==1))
tn = tn_n/fall_value * 100

In [20]:
compare

Unnamed: 0,0,1,new_x,new_y,true_value
0,0.280,0.720,0,1,1
1,0.145,0.855,0,1,1
2,0.083,0.917,0,1,1
3,0.506,0.494,0,0,0
4,0.828,0.172,1,0,0
...,...,...,...,...,...
982,0.645,0.355,1,0,0
983,0.294,0.706,0,1,1
984,0.149,0.851,0,1,1
985,0.657,0.343,1,0,0


In [21]:
rise_value, fall_value

(407, 376)

In [22]:
print("Number of all signals/All Candles Received: ",len(y_pred))
print(f"Number of all CALLE Trade Taken: {rise_value} Number of Correct Trades: {tp_n}")
print(f"Number of PUTE trades Taken: {fall_value} Number of correct trades: {tn_n}")
print("Total Number of Trades Takesn: ", tn_n + tp_n)
print("Percentage of candles traded: ",(tn_n+tp_n)/len(y_pred)*100)
print("Model Accuracy is: ",acc)
print("Rise Accuracy is: ",tp)
print("Fall Accuracy is: ", tn)

Number of all signals/All Candles Received:  987
Number of all CALLE Trade Taken: 407 Number of Correct Trades: 377
Number of PUTE trades Taken: 376 Number of correct trades: 346
Total Number of Trades Takesn:  723
Percentage of candles traded:  73.25227963525836
Model Accuracy is:  0.851063829787234
Rise Accuracy is:  92.62899262899262
Fall Accuracy is:  92.02127659574468


In [23]:
strategy_accuracy = (tp*rise_value + tn * fall_value)/(rise_value+fall_value)
strategy_accuracy

92.33716475095785

### Deployment
* Stream just two rows of data, add the last row to first_copy data, then transform the data to fully form.
* Take the last row as the new data to predicted in a new variable

In [24]:
344*66

22704