### Imports

Main documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM

In [15]:
import pandas as pd
import numpy as np
import ta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time
from datetime import datetime, timedelta

### API Connection and data downloading

## Constants

In [16]:
df = pd.read_csv("ETH_USD_1HR.csv")
df

Unnamed: 0,close,epoch,high,low,open
0,2052.5850,1701255600,2056.895,2045.5260,2048.0650
1,2053.7550,1701259200,2060.615,2050.1450,2052.5850
2,2053.6150,1701262800,2061.845,2051.7550,2053.7550
3,2036.4850,1701266400,2058.835,2032.4915,2053.3425
4,2031.3500,1701270000,2040.395,2020.2950,2036.2850
...,...,...,...,...,...
4994,3323.4350,1719234000,3328.195,3291.3950,3317.9950
4995,3306.7450,1719237600,3331.734,3292.0050,3323.4350
4996,3278.6850,1719241200,3312.755,3260.6965,3307.2150
4997,3259.6875,1719244800,3291.445,3241.3980,3278.7950


In [17]:
data_history = df.copy()
data_history

Unnamed: 0,close,epoch,high,low,open
0,2052.5850,1701255600,2056.895,2045.5260,2048.0650
1,2053.7550,1701259200,2060.615,2050.1450,2052.5850
2,2053.6150,1701262800,2061.845,2051.7550,2053.7550
3,2036.4850,1701266400,2058.835,2032.4915,2053.3425
4,2031.3500,1701270000,2040.395,2020.2950,2036.2850
...,...,...,...,...,...
4994,3323.4350,1719234000,3328.195,3291.3950,3317.9950
4995,3306.7450,1719237600,3331.734,3292.0050,3323.4350
4996,3278.6850,1719241200,3312.755,3260.6965,3307.2150
4997,3259.6875,1719244800,3291.445,3241.3980,3278.7950


## Feature Engineering
Here we introduce all the features we need (indicators, std, price action etc..)
* roc
* willims r indicator
* ATr
* cci
* stddev
* Aroon
* ema_9
* ema_20
* ema_50,
* ADX,
* RSI
* Candle size

### Trend Indicators
* EMA_9, EMA_20, EMA_50, Aroon indicator, adx indicator, cci indicator,

In [18]:
ema_9 = ta.trend.EMAIndicator(close=df['close'], window=9)
ema_20 = ta.trend.EMAIndicator(close=df['close'], window=20)
ema_50 = ta.trend.EMAIndicator(close=df['close'], window=50)
df = df.assign(ema_9 = ema_9.ema_indicator())
df = df.assign(ema_20 = ema_20.ema_indicator())
df = df.assign(ema_50 = ema_50.ema_indicator())
# aroon = ta.trend.AroonIndicator(high=df['high'], low=df['low'], window=14).aroon_indicator()
# df = df.assign(aroon=aroon)
adx = ta.trend.ADXIndicator(high=df['high'],low=df['low'], close=df['close'], window=14).adx()
df = df.assign(adx=adx)
# cci = ta.trend.CCIIndicator(high=df['high'], low=df['low'], close=df['close'], window=14).cci()
# df = df.assign(cci=cci)

### Momentum Indicators
* Rsi, candle size, ROC,Williams %r

In [19]:
true_candle_size = df['close'] - df['open']
df = df.assign(true_candle_size = true_candle_size)
candle_size = abs(df['close'] - df['open'])
df = df.assign(candle_size=candle_size)
rsi_value = ta.momentum.RSIIndicator(close=df['close'], window=14)
df = df.assign(rsi = rsi_value.rsi())
# roc = ta.momentum.ROCIndicator(close=df['close'], window=5).roc()
# df = df.assign(roc=roc)
# williams = ta.momentum.WilliamsRIndicator(high=df['high'], low=df['low'], close=df['close'], lbp=14).williams_r()
# df = df.assign(williams = williams)

## Volatility Indicators
* ATR indicator, BOllinger Bands Parameters, 

In [20]:
candle_diff = np.where(df['close'] >= df['open'],
                       np.maximum(abs(df['open'] - df['low']), abs(df['high'] - df['close'])),
                       np.maximum(abs(df['high'] - df['open']), abs(df['close'] - df['low'])))
wick_ratio =candle_size / candle_diff
df = df.assign(wick_ratio = wick_ratio)

In [21]:
df

Unnamed: 0,close,epoch,high,low,open,ema_9,ema_20,ema_50,adx,true_candle_size,candle_size,rsi,wick_ratio
0,2052.5850,1701255600,2056.895,2045.5260,2048.0650,,,,0.000000,4.5200,4.5200,,1.048724
1,2053.7550,1701259200,2060.615,2050.1450,2052.5850,,,,0.000000,1.1700,1.1700,,0.170554
2,2053.6150,1701262800,2061.845,2051.7550,2053.7550,,,,0.000000,-0.1400,0.1400,,0.017305
3,2036.4850,1701266400,2058.835,2032.4915,2053.3425,,,,0.000000,-16.8575,16.8575,,3.069185
4,2031.3500,1701270000,2040.395,2020.2950,2036.2850,,,,0.000000,-4.9350,4.9350,,0.446404
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4994,3323.4350,1719234000,3328.195,3291.3950,3317.9950,3343.184033,3380.968287,3433.242135,64.330997,5.4400,5.4400,25.326321,0.204511
4995,3306.7450,1719237600,3331.734,3292.0050,3323.4350,3335.896226,3373.899402,3428.281463,65.129209,-16.6900,16.6900,22.814543,1.132293
4996,3278.6850,1719241200,3312.755,3260.6965,3307.2150,3324.453981,3364.831364,3422.414935,66.118811,-28.5300,28.5300,19.341466,1.586013
4997,3259.6875,1719244800,3291.445,3241.3980,3278.7950,3311.500685,3354.817663,3416.033467,67.167026,-19.1075,19.1075,17.409168,1.044725


In [22]:
atr = ta.volatility.AverageTrueRange(high=df['high'],
                                     low=df['low'], 
                                     close=df['close'],
                                     window=14).average_true_range()
df = df.assign(atr=atr)
bol_width = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_wband()
df = df.assign(bol_width=bol_width)
bol_above = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_hband_indicator()
df = df.assign(bol_above=bol_above)
bol_below = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2).bollinger_lband_indicator()
df = df.assign(bol_below=bol_below)
# average_candle_size = df['candle_size'].rolling(window=14).mean()
# df = df.assign(average_candle_size = average_candle_size)

In [23]:
trend_filter = np.where((df['close'] >= df['ema_20']) & (df['ema_20'] >= df['ema_50']), 1,
                        np.where((df['close'] <= df['ema_20']) & (df['ema_20'] <= df['ema_50']), 0, 2))
df = df.assign(trend_filter = trend_filter)

In [24]:
immediate_trend = (df['close'] >= df['ema_9']).astype(int)
df = df.assign(immediate_trend = immediate_trend)

In [28]:
df[-30:]

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,adx,true_candle_size,candle_size,rsi,wick_ratio,atr,bol_width,bol_above,bol_below,trend_filter,immediate_trend
4969,3498.835,3502.545,3494.6285,3496.885,3501.352872,3502.306323,3504.837748,10.246313,1.95,1.95,47.462728,0.525606,11.335208,0.863424,0.0,0.0,0,0
4970,3496.405,3503.085,3492.005,3498.835,3500.363298,3501.744293,3504.507052,10.565343,-2.43,2.43,45.934521,0.552273,11.316979,0.863647,0.0,0.0,0,0
4971,3483.485,3497.995,3479.389,3496.405,3496.987638,3500.005312,3503.682658,11.961849,-12.92,12.92,38.784207,3.154297,11.837624,0.952745,0.0,1.0,0,0
4972,3472.995,3488.406,3470.345,3483.402,3492.18911,3497.432902,3502.47922,13.869439,-10.407,10.407,34.137782,2.079736,12.282151,1.163512,0.0,1.0,0,0
4973,3461.515,3481.195,3460.005,3472.995,3486.054288,3494.012149,3500.87278,16.214397,-11.48,11.48,29.914102,1.4,12.918425,1.519402,0.0,1.0,0,0
4974,3471.675,3478.865,3460.195,3461.515,3483.178431,3491.884802,3499.727769,18.391859,10.16,10.16,37.306948,1.413074,13.329252,1.662071,0.0,0.0,0,0
4975,3476.325,3477.325,3470.695,3471.675,3481.807745,3490.402916,3498.810013,20.413788,4.65,4.65,40.405312,4.65,12.850734,1.743019,0.0,0.0,0,0
4976,3478.875,3480.655,3474.005,3476.325,3481.221196,3489.305019,3498.028248,21.767023,2.55,2.55,42.095358,1.099138,12.407825,1.797761,0.0,0.0,0,0
4977,3434.135,3479.105,3407.625,3478.875,3471.803957,3484.050731,3495.522631,25.266694,-44.74,44.74,27.408771,1.687665,16.627266,2.341479,0.0,1.0,0,0
4978,3433.775,3439.031,3405.721,3434.135,3464.198165,3479.262567,3493.101155,28.549269,-0.36,0.36,27.326157,0.012832,17.81889,2.72369,0.0,1.0,0,0


In [29]:
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998
close,2052.585,2053.755,2053.615,2036.485,2031.35,2025.925,2032.445,2037.283,2038.55,2025.695,...,3383.205,3306.995,3317.6285,3319.3825,3317.745,3323.435,3306.745,3278.685,3259.6875,3292.505
high,2056.895,2060.615,2061.845,2058.835,2040.395,2033.825,2037.705,2037.845,2042.095,2039.8815,...,3392.995,3388.215,3323.365,3330.395,3335.425,3328.195,3331.734,3312.755,3291.445,3302.403
low,2045.526,2050.145,2051.755,2032.4915,2020.295,2019.145,2020.035,2028.275,2032.832,2019.395,...,3353.325,3217.185,3302.805,3304.255,3310.045,3291.395,3292.005,3260.6965,3241.398,3245.0825
open,2048.065,2052.585,2053.755,2053.3425,2036.285,2031.455,2025.925,2032.625,2037.275,2038.475,...,3367.675,3383.205,3306.995,3316.8285,3319.395,3317.995,3323.435,3307.215,3278.795,3258.785
ema_9,,,,,,,,,2039.33015,2036.60312,...,3393.990125,3376.5911,3364.79858,3355.715364,3348.121291,3343.184033,3335.896226,3324.453981,3311.500685,3307.701548
ema_20,,,,,,,,,,,...,3422.066828,3411.107606,3402.204834,3394.316993,3387.024422,3380.968287,3373.899402,3364.831364,3354.817663,3348.883123
ema_50,,,,,,,,,,,...,3458.916055,3452.958367,3447.651313,3442.621164,3437.724059,3433.242135,3428.281463,3422.414935,3416.033467,3411.189214
adx,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,56.617984,58.770884,60.770006,62.205838,63.232754,64.330997,65.129209,66.118811,67.167026,67.494243
true_candle_size,4.52,1.17,-0.14,-16.8575,-4.935,-5.53,6.52,4.658,1.275,-12.78,...,15.53,-76.21,10.6335,2.554,-1.65,5.44,-16.69,-28.53,-19.1075,33.72
candle_size,4.52,1.17,0.14,16.8575,4.935,5.53,6.52,4.658,1.275,12.78,...,15.53,76.21,10.6335,2.554,1.65,5.44,16.69,28.53,19.1075,33.72


In [31]:
# del df['epoch']

In [32]:
dif = df.copy()
dif

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,adx,true_candle_size,candle_size,rsi,wick_ratio,atr,bol_width,bol_above,bol_below,trend_filter,immediate_trend
0,2052.5850,2056.895,2045.5260,2048.0650,,,,0.000000,4.5200,4.5200,,1.048724,0.000000,,0.0,0.0,2,0
1,2053.7550,2060.615,2050.1450,2052.5850,,,,0.000000,1.1700,1.1700,,0.170554,0.000000,,0.0,0.0,2,0
2,2053.6150,2061.845,2051.7550,2053.7550,,,,0.000000,-0.1400,0.1400,,0.017305,0.000000,,0.0,0.0,2,0
3,2036.4850,2058.835,2032.4915,2053.3425,,,,0.000000,-16.8575,16.8575,,3.069185,0.000000,,0.0,0.0,2,0
4,2031.3500,2040.395,2020.2950,2036.2850,,,,0.000000,-4.9350,4.9350,,0.446404,0.000000,,0.0,0.0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4994,3323.4350,3328.195,3291.3950,3317.9950,3343.184033,3380.968287,3433.242135,64.330997,5.4400,5.4400,25.326321,0.204511,32.930919,6.016045,0.0,0.0,0,0
4995,3306.7450,3331.734,3292.0050,3323.4350,3335.896226,3373.899402,3428.281463,65.129209,-16.6900,16.6900,22.814543,1.132293,33.416496,5.919135,0.0,0.0,0,0
4996,3278.6850,3312.755,3260.6965,3307.2150,3324.453981,3364.831364,3422.414935,66.118811,-28.5300,28.5300,19.341466,1.586013,34.748068,5.885943,0.0,0.0,0,0
4997,3259.6875,3291.445,3241.3980,3278.7950,3311.500685,3354.817663,3416.033467,67.167026,-19.1075,19.1075,17.409168,1.044725,35.840849,6.312871,0.0,0.0,0,0


In [16]:
# for column in dif.columns: # shift 1,2,3,4
#     num = list(range(1,duplicate_num+1))
#     new_columns = {}
#     for n in num:
#         new_column_name = f"{column}_{n}"
#         # Assign the shifted values to the new column name in the dictionary
#         new_columns[new_column_name] = df[column].shift(n)
#         df = df.assign(**new_columns)

In [33]:
df.tail()

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,adx,true_candle_size,candle_size,rsi,wick_ratio,atr,bol_width,bol_above,bol_below,trend_filter,immediate_trend
4994,3323.435,3328.195,3291.395,3317.995,3343.184033,3380.968287,3433.242135,64.330997,5.44,5.44,25.326321,0.204511,32.930919,6.016045,0.0,0.0,0,0
4995,3306.745,3331.734,3292.005,3323.435,3335.896226,3373.899402,3428.281463,65.129209,-16.69,16.69,22.814543,1.132293,33.416496,5.919135,0.0,0.0,0,0
4996,3278.685,3312.755,3260.6965,3307.215,3324.453981,3364.831364,3422.414935,66.118811,-28.53,28.53,19.341466,1.586013,34.748068,5.885943,0.0,0.0,0,0
4997,3259.6875,3291.445,3241.398,3278.795,3311.500685,3354.817663,3416.033467,67.167026,-19.1075,19.1075,17.409168,1.044725,35.840849,6.312871,0.0,0.0,0,0
4998,3292.505,3302.403,3245.0825,3258.785,3307.701548,3348.883123,3411.189214,67.494243,33.72,33.72,30.353453,2.460865,37.37511,6.266406,0.0,0.0,0,0


## Calculate the target class


In [34]:
prediction_candles

-5

In [35]:
predicted_price = df['close'].shift(prediction_candles)
df = df.assign(predicted_price = predicted_price)
df = df.dropna()
target = np.where(
    df['predicted_price'] > (df['close'] + df['atr'] * candle_size_multiplier), 1,
    np.where(df['predicted_price'] < (df['close'] - df['atr'] * candle_size_multiplier), 0, 2)
)
del df['predicted_price']
df = df.assign(target=target)
df

Unnamed: 0,close,high,low,open,ema_9,ema_20,ema_50,adx,true_candle_size,candle_size,rsi,wick_ratio,atr,bol_width,bol_above,bol_below,trend_filter,immediate_trend,target
49,2087.1750,2094.3095,2083.7050,2092.2150,2088.752860,2076.621254,2060.523082,38.429923,-5.0400,5.0400,61.568690,1.452450,12.092792,4.610828,0.0,0.0,1,0,2
50,2080.4250,2091.0950,2076.8350,2087.1750,2087.087288,2076.983515,2061.303549,37.882113,-6.7500,6.7500,56.236051,1.721939,12.247593,4.363793,0.0,0.0,1,0,2
51,2078.0050,2082.8650,2073.7450,2080.4250,2085.270830,2077.080800,2061.958508,36.973599,-2.4200,2.4200,54.416318,0.568075,12.024193,4.058412,0.0,0.0,1,0,2
52,2079.4350,2082.8150,2070.3035,2078.0050,2084.103664,2077.305009,2062.643860,35.693355,1.4300,1.4300,55.336038,0.185678,12.059001,3.738749,0.0,0.0,1,0,2
53,2104.2450,2109.9700,2077.6550,2079.4350,2088.131931,2079.870723,2064.275278,36.200370,24.8100,24.8100,67.563891,4.333624,13.505858,3.536838,0.0,0.0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4989,3383.2050,3392.9950,3353.3250,3367.6750,3393.990125,3422.066828,3458.916055,56.617984,15.5300,15.5300,28.682438,1.082230,24.263697,4.641031,0.0,0.0,0,0,0
4990,3306.9950,3388.2150,3217.1850,3383.2050,3376.591100,3411.107606,3452.958367,58.770884,-76.2100,76.2100,17.552924,0.848569,34.747005,5.273318,0.0,1.0,0,0,2
4991,3317.6285,3323.3650,3302.8050,3306.9950,3364.798580,3402.204834,3447.651313,60.770006,10.6335,10.6335,22.095198,1.853656,33.733647,5.620542,0.0,0.0,0,0,0
4992,3319.3825,3330.3950,3304.2550,3316.8285,3355.715364,3394.316993,3442.621164,62.205838,2.5540,2.5540,22.850239,0.203126,33.191244,5.861675,0.0,0.0,0,0,0


In [36]:
x = df.drop(['target'], axis =1)
y = df['target']

In [37]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [41]:
x.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4945 entries, 49 to 4993
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   close             4945 non-null   float64
 1   high              4945 non-null   float64
 2   low               4945 non-null   float64
 3   open              4945 non-null   float64
 4   ema_9             4945 non-null   float64
 5   ema_20            4945 non-null   float64
 6   ema_50            4945 non-null   float64
 7   adx               4945 non-null   float64
 8   true_candle_size  4945 non-null   float64
 9   candle_size       4945 non-null   float64
 10  rsi               4945 non-null   float64
 11  wick_ratio        4945 non-null   float64
 12  atr               4945 non-null   float64
 13  bol_width         4945 non-null   float64
 14  bol_above         4945 non-null   float64
 15  bol_below         4945 non-null   float64
 16  trend_filter      4945 non-null   int32  

In [40]:
y

49      2
50      2
51      2
52      2
53      0
       ..
4989    0
4990    2
4991    0
4992    0
4993    2
Name: target, Length: 4945, dtype: int32

In [46]:
x_scaled = scaler.fit(np.array(x))
x_scaled

ValueError: Input X contains infinity or a value too large for dtype('float64').

In [44]:
y_scaled = scaler.fit_transform(np.array(y).reshape(-1,1))
y_scaled

array([[ 0.73150936],
       [ 0.73150936],
       [ 0.73150936],
       ...,
       [-1.81948767],
       [-1.81948767],
       [ 0.73150936]])

In [45]:
y_scaled.shape

(4945, 1)

In [38]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x)

ValueError: Input X contains infinity or a value too large for dtype('float64').

In [None]:
X_scaled

In [None]:
y = np.array(y)
y

In [None]:
# Define sequence length
sequence_length = 10

# Create sequences and corresponding targets
def create_sequences(x, y, sequence_length):
    X_sequences, y_sequences = [], []
    for i in range(len(x) - sequence_length + 1):
        X_sequences.append(x[i:i + sequence_length])
        y_sequences.append(y[i + sequence_length - 1])  # target is the last element in the sequence
    return np.array(X_sequences), np.array(y_sequences)

X_seq, y_seq = create_sequences(X_scaled, y, sequence_length)

In [None]:
from tensorflow.keras.utils import to_categorical

y_seq_categorical = to_categorical(y_seq)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq_categorical, test_size=0.2, random_state=42)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(50, input_shape=(sequence_length, X_scaled.shape[1]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(25, activation='relu'))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

In [21]:
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
acc

NameError: name 'model' is not defined