### Imports

In [1]:
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time
from datetime import datetime, timedelta

## Constants

In [101]:
prediction_candles = 5
prediction_candles = -prediction_candles
accuracy_threshold = 0.5
candle_size_multiplier = 1
amount = 100
duplicate_num = int(-prediction_candles -1)
# the number of times each column is to be repeated in the back story... 
# the total number of the same columns will hence be duplicate_num + 1 . In this case 6.

### Initialisation

### Download the data
Download the data once and train the model, the next data is only downloaded as 2 rows only and appended to already existing dataset for feature extraction and transformation

In [102]:
df = pd.read_csv("EUR_USD_1HR.csv")
df

Unnamed: 0,close,epoch,high,low,open
0,1.09765,1701254179,1.09778,1.09714,1.09715
1,1.09848,1701255600,1.09850,1.09714,1.09767
2,1.09847,1701259200,1.09888,1.09793,1.09847
3,1.09728,1701262800,1.09947,1.09719,1.09854
4,1.09767,1701266400,1.09905,1.09621,1.09728
...,...,...,...,...,...
3407,1.07428,1719234000,1.07436,1.07347,1.07347
3408,1.07301,1719237600,1.07465,1.07273,1.07428
3409,1.07273,1719241200,1.07332,1.07235,1.07301
3410,1.07278,1719244800,1.07315,1.07262,1.07272


## Feature Engineering
Here we introduce all the features we need (indicators, std, price action etc..)
* roc
* willims r indicator
* ATr
* cci
* stddev
* Aroon
* ema_9
* ema_20
* ema_50,
* ADX,
* RSI
* Candle size

### Trend Indicators
* EMA_9, EMA_20, EMA_50, Aroon indicator, adx indicator, cci indicator,

In [103]:
ema_9 = ta.trend.EMAIndicator(close=df['close'], window=9)
ema_20 = ta.trend.EMAIndicator(close=df['close'], window=20)
ema_50 = ta.trend.EMAIndicator(close=df['close'], window=50)
df = df.assign(ema_9 = ema_9.ema_indicator())
df = df.assign(ema_20 = ema_20.ema_indicator())
df = df.assign(ema_50 = ema_50.ema_indicator())
aroon = ta.trend.AroonIndicator(high=df['high'], low=df['low'], window=14).aroon_indicator()
df = df.assign(aroon=aroon)
adx = ta.trend.ADXIndicator(high=df['high'],low=df['low'], close=df['close'], window=14).adx()
df = df.assign(adx=adx)
cci = ta.trend.CCIIndicator(high=df['high'], low=df['low'], close=df['close'], window=14).cci()
df = df.assign(cci=cci)

### Momentum Indicators
* Rsi, candle size, ROC,Williams %r

In [104]:
true_candle_size = df['close'] - df['open']
df = df.assign(true_candle_size = true_candle_size)
candle_size = abs(df['close'] - df['open'])
df = df.assign(candle_size=candle_size)
rsi_value = ta.momentum.RSIIndicator(close=df['close'], window=14)
df = df.assign(rsi = rsi_value.rsi())
roc = ta.momentum.ROCIndicator(close=df['close'], window=5).roc()
df = df.assign(roc=roc)
williams = ta.momentum.WilliamsRIndicator(high=df['high'], low=df['low'], close=df['close'], lbp=14).williams_r()
df = df.assign(williams = williams)

## Volatility Indicators
* ATR indicator, BOllinger Bands Parameters, 

In [105]:
df

Unnamed: 0,close,epoch,high,low,open,ema_9,ema_20,ema_50,aroon,adx,cci,true_candle_size,candle_size,rsi,roc,williams
0,1.09765,1701254179,1.09778,1.09714,1.09715,,,,,0.000000,,0.00050,0.00050,,,
1,1.09848,1701255600,1.09850,1.09714,1.09767,,,,,0.000000,,0.00081,0.00081,,,
2,1.09847,1701259200,1.09888,1.09793,1.09847,,,,,0.000000,,0.00000,0.00000,,,
3,1.09728,1701262800,1.09947,1.09719,1.09854,,,,,0.000000,,-0.00126,0.00126,,,
4,1.09767,1701266400,1.09905,1.09621,1.09728,,,,,0.000000,,0.00039,0.00039,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3407,1.07428,1719234000,1.07436,1.07347,1.07347,1.072425,1.071283,1.071093,85.714286,35.788422,129.553727,0.00081,0.00081,74.834085,0.271615,-1.340034
3408,1.07301,1719237600,1.07465,1.07273,1.07428,1.072542,1.071447,1.071168,92.857143,35.443215,90.298325,-0.00127,0.00127,62.612013,0.069012,-26.198083
3409,1.07273,1719241200,1.07332,1.07235,1.07301,1.072580,1.071569,1.071229,92.857143,34.521228,54.033369,-0.00028,0.00028,60.274687,-0.027958,-32.876712
3410,1.07278,1719244800,1.07315,1.07262,1.07272,1.072620,1.071685,1.071290,85.714286,33.665097,48.356941,0.00006,0.00006,60.557838,-0.075448,-32.635253


In [106]:
atr = ta.volatility.AverageTrueRange(high=df['high'],
                                     low=df['low'], 
                                     close=df['close'],
                                     window=14).average_true_range()
df = df.assign(atr=atr)
bol_width = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_wband()
df = df.assign(bol_width=bol_width)
bol_above = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_hband_indicator()
df = df.assign(bol_above=bol_above)
bol_below = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_lband_indicator()
df = df.assign(bol_below=bol_below)
average_candle_size = df['candle_size'].rolling(window=14).mean()
df = df.assign(average_candle_size = average_candle_size)

In [107]:
trend_filter = np.where((df['close'] >= df['ema_20']) & (df['ema_20'] >= df['ema_50']), 1,
                        np.where((df['close'] <= df['ema_20']) & (df['ema_20'] <= df['ema_50']), 0, 2))
df = df.assign(trend_filter = trend_filter)

In [108]:
dif = df.copy()
dif

Unnamed: 0,close,epoch,high,low,open,ema_9,ema_20,ema_50,aroon,adx,...,candle_size,rsi,roc,williams,atr,bol_width,bol_above,bol_below,average_candle_size,trend_filter
0,1.09765,1701254179,1.09778,1.09714,1.09715,,,,,0.000000,...,0.00050,,,,0.000000,,0.0,0.0,,2
1,1.09848,1701255600,1.09850,1.09714,1.09767,,,,,0.000000,...,0.00081,,,,0.000000,,0.0,0.0,,2
2,1.09847,1701259200,1.09888,1.09793,1.09847,,,,,0.000000,...,0.00000,,,,0.000000,,0.0,0.0,,2
3,1.09728,1701262800,1.09947,1.09719,1.09854,,,,,0.000000,...,0.00126,,,,0.000000,,0.0,0.0,,2
4,1.09767,1701266400,1.09905,1.09621,1.09728,,,,,0.000000,...,0.00039,,,,0.000000,,0.0,0.0,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3407,1.07428,1719234000,1.07436,1.07347,1.07347,1.072425,1.071283,1.071093,85.714286,35.788422,...,0.00081,74.834085,0.271615,-1.340034,0.000940,0.655009,1.0,0.0,0.000444,1
3408,1.07301,1719237600,1.07465,1.07273,1.07428,1.072542,1.071447,1.071168,92.857143,35.443215,...,0.00127,62.612013,0.069012,-26.198083,0.001010,0.675449,0.0,0.0,0.000506,1
3409,1.07273,1719241200,1.07332,1.07235,1.07301,1.072580,1.071569,1.071229,92.857143,34.521228,...,0.00028,60.274687,-0.027958,-32.876712,0.001007,0.681020,0.0,0.0,0.000519,1
3410,1.07278,1719244800,1.07315,1.07262,1.07272,1.072620,1.071685,1.071290,85.714286,33.665097,...,0.00006,60.557838,-0.075448,-32.635253,0.000973,0.683199,0.0,0.0,0.000513,1


In [109]:
# for column in dif.columns: # shift 1,2,3,4
#     num = list(range(1,duplicate_num+1))
#     new_columns = {}
#     for n in num:
#         new_column_name = f"{column}_{n}"
#         # Assign the shifted values to the new column name in the dictionary
#         new_columns[new_column_name] = df[column].shift(n)
#         df = df.assign(**new_columns)

In [110]:
df.tail()

Unnamed: 0,close,epoch,high,low,open,ema_9,ema_20,ema_50,aroon,adx,...,candle_size,rsi,roc,williams,atr,bol_width,bol_above,bol_below,average_candle_size,trend_filter
3407,1.07428,1719234000,1.07436,1.07347,1.07347,1.072425,1.071283,1.071093,85.714286,35.788422,...,0.00081,74.834085,0.271615,-1.340034,0.00094,0.655009,1.0,0.0,0.000444,1
3408,1.07301,1719237600,1.07465,1.07273,1.07428,1.072542,1.071447,1.071168,92.857143,35.443215,...,0.00127,62.612013,0.069012,-26.198083,0.00101,0.675449,0.0,0.0,0.000506,1
3409,1.07273,1719241200,1.07332,1.07235,1.07301,1.07258,1.071569,1.071229,92.857143,34.521228,...,0.00028,60.274687,-0.027958,-32.876712,0.001007,0.68102,0.0,0.0,0.000519,1
3410,1.07278,1719244800,1.07315,1.07262,1.07272,1.07262,1.071685,1.07129,85.714286,33.665097,...,6e-05,60.557838,-0.075448,-32.635253,0.000973,0.683199,0.0,0.0,0.000513,1
3411,1.07252,1719248400,1.07278,1.07221,1.07278,1.0726,1.071764,1.071338,78.571429,32.218354,...,0.00026,58.233432,-0.088498,-38.586957,0.000944,0.678073,0.0,0.0,0.000515,1


## Calculate the target class


In [111]:
prediction_candles

-5

In [112]:
predicted_price = df['close'].shift(prediction_candles)
df = df.assign(predicted_price = predicted_price)
df = df.dropna()
target = np.where(
    df['predicted_price'] > (df['close'] + df['atr'] * candle_size_multiplier), 1,
    np.where(df['predicted_price'] < (df['close'] - df['atr'] * candle_size_multiplier), 0, 2)
)
del df['predicted_price']
df = df.assign(target=target)
df

Unnamed: 0,close,epoch,high,low,open,ema_9,ema_20,ema_50,aroon,adx,...,rsi,roc,williams,atr,bol_width,bol_above,bol_below,average_candle_size,trend_filter,target
49,1.08974,1701428400,1.08975,1.08827,1.08908,1.089826,1.090427,1.092553,-64.285714,25.212433,...,41.770680,-0.107250,-51.324503,0.001373,0.296371,0.0,0.0,0.000534,0,0
50,1.08768,1701432000,1.09012,1.08758,1.08975,1.089397,1.090165,1.092362,-71.428571,26.316105,...,32.727465,-0.165216,-97.304582,0.001457,0.329116,0.0,1.0,0.000654,0,2
51,1.08679,1701435600,1.08847,1.08666,1.08767,1.088875,1.089844,1.092143,-78.571429,27.830929,...,29.732510,-0.234087,-97.192225,0.001482,0.397421,0.0,1.0,0.000715,0,1
52,1.08446,1701439200,1.08703,1.08415,1.08677,1.087992,1.089331,1.091842,-85.714286,30.187934,...,23.634647,-0.487259,-95.658263,0.001582,0.565926,0.0,1.0,0.000807,0,1
53,1.08378,1701442800,1.08665,1.08333,1.08445,1.087150,1.088803,1.091526,-92.857143,32.605729,...,22.203435,-0.484822,-94.346734,0.001706,0.717298,0.0,1.0,0.000819,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3402,1.07137,1719216000,1.07178,1.07089,1.07128,1.070177,1.069897,1.070590,100.000000,33.276143,...,61.804039,0.205767,-12.094395,0.000880,0.310427,1.0,0.0,0.000276,2,1
3403,1.07227,1719219600,1.07290,1.07131,1.07137,1.070596,1.070123,1.070656,57.142857,33.355505,...,66.922004,0.262749,-13.968958,0.000930,0.371265,1.0,0.0,0.000331,2,2
3404,1.07303,1719223200,1.07308,1.07197,1.07227,1.071083,1.070400,1.070749,64.285714,33.583442,...,70.514852,0.294426,-1.066098,0.000943,0.460367,1.0,0.0,0.000377,2,2
3405,1.07359,1719226800,1.07359,1.07266,1.07303,1.071584,1.070704,1.070860,71.428571,34.208711,...,72.854531,0.312077,-0.000000,0.000942,0.553838,1.0,0.0,0.000408,2,2


In [113]:
x = df.drop(['target'], axis =1)
y = df['target']

In [114]:
x.shape, y.shape

((3358, 22), (3358,))

In [115]:
y

49      0
50      2
51      1
52      1
53      1
       ..
3402    1
3403    2
3404    2
3405    2
3406    0
Name: target, Length: 3358, dtype: int32

In [116]:
train_size = int(0.8 * len(x))
train_size

2686

In [117]:
x_train, y_train = x[:train_size], y[:train_size]
x_train.shape, y_train.shape

((2686, 22), (2686,))

In [118]:
x_test, y_test = x[train_size:], y[train_size:]
x_test.shape, y_test.shape

((672, 22), (672,))

In [119]:
random_state = 1
model = RandomForestClassifier(n_estimators=1000, random_state=1) # keep it 1000
model.fit(x_train, y_train)

In [120]:
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
acc

0.5699404761904762

In [121]:
new_x = np.where(y_probab[:, 0] >= accuracy_threshold, 1, 0)
new_y = np.where(y_probab[:, 1] >= accuracy_threshold, 1, 0)

In [99]:
(new_x==1).sum(), (new_y==1).sum()

(312, 321)

In [None]:
### STrategy Design