In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# Import Data & Manipulate Data

In [2]:
df_org = pd.read_csv("[2010-2020]XAUUSD-M5-NoSession.csv")

## Set DateTime as Index

In [3]:
df_org["DateTime"] = pd.to_datetime(df_org["DateTime"])
df_org = df_org.set_index("DateTime")
df_org = df_org.sort_index()
df_org.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 715180 entries, 2010-01-01 00:00:00 to 2020-01-01 23:55:00
Data columns (total 5 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Open    715180 non-null  float64
 1   High    715180 non-null  float64
 2   Low     715180 non-null  float64
 3   Close   715180 non-null  float64
 4   Volume  715180 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 32.7 MB


# Select Columns

In [9]:
df = df_org[["Open", "High", "Low", "Close"]]
df

Unnamed: 0_level_0,Open,High,Low,Close
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01 00:00:00,1096.09,1096.21,1096.02,1096.07
2010-01-01 00:05:00,1096.15,1096.17,1095.99,1096.04
2010-01-01 00:10:00,1096.06,1096.15,1095.99,1096.15
2010-01-01 00:15:00,1096.07,1096.17,1096.05,1096.17
2010-01-01 00:20:00,1096.08,1096.20,1096.05,1096.15
...,...,...,...,...
2020-01-01 23:35:00,1519.30,1519.52,1518.77,1519.28
2020-01-01 23:40:00,1519.29,1519.61,1519.11,1519.33
2020-01-01 23:45:00,1519.33,1519.33,1518.41,1518.59
2020-01-01 23:50:00,1518.58,1518.80,1518.48,1518.63


# Feature Engineering

## Adding Heiken Ashi Candels

In [143]:
data = {"DateTime" : ["2023.10.3 08:55", "2023.10.3 09:00", "2023.10.3 09:05", "2023.10.3 09:10"],
        "Open" :  [1818.45, 1819.51, 1820.21, 1821.96],
        "High" :  [1819.52, 1820.56, 1822.22, 1823.08],
        "Low" :   [1818.26, 1819.21, 1820.10, 1821.37],
        "Close" : [1819.51, 1820.21, 1821.96, 1823.08]
       }
dff = pd.DataFrame(data)
dff

Unnamed: 0,DateTime,Open,High,Low,Close
0,2023.10.3 08:55,1818.45,1819.52,1818.26,1819.51
1,2023.10.3 09:00,1819.51,1820.56,1819.21,1820.21
2,2023.10.3 09:05,1820.21,1822.22,1820.1,1821.96
3,2023.10.3 09:10,1821.96,1823.08,1821.37,1823.08


In [144]:
def HA(dff):
    dff["pre_close"] = dff[""].shift(1)
    dff["pre_open"] = dff["HA_O"].shift(1)
    
    dff["HA_O"] = (dff["pre_open"] + dff["pre_close"]) / 2
    dff["HA_Max"] = dff[["Open", "High", "Low", "Close", "HA_O"]].max(axis=1)
    dff["HA_Min"] = dff[["Open", "High", "Low", "Close", "HA_O"]].min(axis=1)
    dff["HA_C"] = (dff["Open"] + dff["Close"] + dff["High"] + dff["Low"]) /4
    
    #dff.drop("pre_close", axis=1, inplace=True)
    #dff.drop("pre_open", axis=1, inplace=True)
    return dff

In [145]:
HA(dff)
dff

KeyError: 'HA_C'

In [142]:
import plotly.graph_objects as go
fig2 = go.Figure(data=[go.Candlestick(x=dff.DateTime,
                open=dff.HA_O,
                high=dff.HA_Max,
                low=dff.HA_Min,
                close=dff.HA_C)] )


fig2.update_layout(yaxis_range = [1815,1830], 
          title = 'Heikin Ashi Chart: RELIANCE', 
          xaxis_title = 'Date', 
          yaxis_title = 'Price')
fig2.show()


## Adding base Ichimoku-KinkoHyo

#### Normal means : current time frame
#### Advanced mean : multi time looking at current timeframe

### Writing Function for better calculation

In [5]:
def add_ichimoku(df, multi_time=3, tk=9, kj=26, s_b=52):
    for i in range(multi_time):
        i += 1
        """           Normal Parameters             """
        
        """      Calculating Normal Tenkan-Sen      """
        high_for_tk = df["High"].rolling(window=tk*i, min_periods=tk*i).max()
        low_for_tk = df["Low"].rolling(window=tk*i, min_periods=tk*i).min()
        df[f"Tenkan_sen_norm_{i}X"] = (high_for_tk + low_for_tk) / 2
        
        """     Calculating Normal Kijun-Sen        """
        high_for_kj = df["High"].rolling(window=kj*i, min_periods=kj*i).max()
        low_for_kj = df["Low"].rolling(window=kj*i, min_periods=kj*i).min()
        df[f"Kijun_sen_norm_{i}X"] = (high_for_kj + low_for_kj) / 2
        
        """     Calculating Normal Senkou-span-A    """
        df[f"Senkou_span_A_norm_{i}X"] = (df[f"Tenkan_sen_norm_{i}X"] + df[f"Kijun_sen_norm_{i}X"]) / 2
        
        """     Calculating Normal Senkou-span-B    """
        high_for_s_b = df["High"].rolling(window=s_b*i, min_periods=s_b*i).max()
        low_for_s_b = df["Low"].rolling(window=s_b*i, min_periods=s_b*i).min()
        df[f"Senkou_span_B_norm_{i}X"] = (high_for_s_b + low_for_s_b) / 2
        
        """           Advanced Parameters            """
        """      Calculating Avanced Tenkan-Sen      """
        df[f"Tenkan_sen_adv_{i}X"] = df[f"Tenkan_sen_norm_{i}X"].shift(kj*i)
        
        """      Calculating Avanced Kijun-Sen       """
        df[f"Kijun_sen_adv_{i}X"] = df[f"Kijun_sen_norm_{i}X"].shift(kj*i)
        
        """     Calculating Avanced Senkou-span-A    """
        df[f"Senkou_span_A_adv_{i}X"] = df[f"Senkou_span_A_norm_{i}X"].shift(kj*i)
        
        """     Calculating Avanced Senkou-span-B    """
        df[f"Senkou_span_B_adv_{i}X"] = df[f"Senkou_span_B_norm_{i}X"].shift(kj*i)
        
        """      Calculating Avanced Chikou-span      """
        df[f"Chikou_span_adv_{i}X"] = df["Close"].shift(kj*i)
        
    return df

In [10]:
add_ichimoku(df, multi_time=3)
df

Unnamed: 0_level_0,Open,High,Low,Close,Tenkan_sen_norm_1X,Kijun_sen_norm_1X,Senkou_span_A_norm_1X,Senkou_span_B_norm_1X,Tenkan_sen_adv_1X,Kijun_sen_adv_1X,...,Chikou_span_adv_2X,Tenkan_sen_norm_3X,Kijun_sen_norm_3X,Senkou_span_A_norm_3X,Senkou_span_B_norm_3X,Tenkan_sen_adv_3X,Kijun_sen_adv_3X,Senkou_span_A_adv_3X,Senkou_span_B_adv_3X,Chikou_span_adv_3X
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01 00:00:00,1096.09,1096.21,1096.02,1096.07,,,,,,,...,,,,,,,,,,
2010-01-01 00:05:00,1096.15,1096.17,1095.99,1096.04,,,,,,,...,,,,,,,,,,
2010-01-01 00:10:00,1096.06,1096.15,1095.99,1096.15,,,,,,,...,,,,,,,,,,
2010-01-01 00:15:00,1096.07,1096.17,1096.05,1096.17,,,,,,,...,,,,,,,,,,
2010-01-01 00:20:00,1096.08,1096.20,1096.05,1096.15,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-01 23:35:00,1519.30,1519.52,1518.77,1519.28,1518.875,1518.875,1518.8750,1518.955,1519.300,1519.680,...,1521.16,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1521.20
2020-01-01 23:40:00,1519.29,1519.61,1519.11,1519.33,1519.920,1518.875,1519.3975,1518.875,1519.300,1519.485,...,1520.80,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1520.33
2020-01-01 23:45:00,1519.33,1519.33,1518.41,1518.59,1519.740,1518.875,1519.3075,1518.875,1519.225,1519.415,...,1520.23,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1520.86
2020-01-01 23:50:00,1518.58,1518.80,1518.48,1518.63,1519.590,1518.875,1519.2325,1518.875,1518.910,1519.280,...,1519.70,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1520.61


# Calculate TK & KJ - S_A & S_B Positions

### No. 0 For Down Trend
### No. 1 For Up Trend

In [7]:
def Tk_Kj_position(df):
    

SyntaxError: unexpected EOF while parsing (4076839768.py, line 2)

# Shift All Columns values for better performance

In [11]:
columnss = list(df.columns)
columnss = columnss[4:]
for col_name in columnss:
    df[col_name] = df[col_name].shift(1)
df

Unnamed: 0_level_0,Open,High,Low,Close,Tenkan_sen_norm_1X,Kijun_sen_norm_1X,Senkou_span_A_norm_1X,Senkou_span_B_norm_1X,Tenkan_sen_adv_1X,Kijun_sen_adv_1X,...,Chikou_span_adv_2X,Tenkan_sen_norm_3X,Kijun_sen_norm_3X,Senkou_span_A_norm_3X,Senkou_span_B_norm_3X,Tenkan_sen_adv_3X,Kijun_sen_adv_3X,Senkou_span_A_adv_3X,Senkou_span_B_adv_3X,Chikou_span_adv_3X
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01 00:00:00,1096.09,1096.21,1096.02,1096.07,,,,,,,...,,,,,,,,,,
2010-01-01 00:05:00,1096.15,1096.17,1095.99,1096.04,,,,,,,...,,,,,,,,,,
2010-01-01 00:10:00,1096.06,1096.15,1095.99,1096.15,,,,,,,...,,,,,,,,,,
2010-01-01 00:15:00,1096.07,1096.17,1096.05,1096.17,,,,,,,...,,,,,,,,,,
2010-01-01 00:20:00,1096.08,1096.20,1096.05,1096.15,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-01 23:35:00,1519.30,1519.52,1518.77,1519.28,1518.875,1518.875,1518.8750,1519.315,1519.310,1520.040,...,1521.95,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1521.70
2020-01-01 23:40:00,1519.29,1519.61,1519.11,1519.33,1518.875,1518.875,1518.8750,1518.955,1519.300,1519.680,...,1521.16,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1521.20
2020-01-01 23:45:00,1519.33,1519.33,1518.41,1518.59,1519.920,1518.875,1519.3975,1518.875,1519.300,1519.485,...,1520.80,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1520.33
2020-01-01 23:50:00,1518.58,1518.80,1518.48,1518.63,1519.740,1518.875,1519.3075,1518.875,1519.225,1519.415,...,1520.23,1518.875,1519.77,1519.3225,1520.66,1520.66,1520.66,1520.66,1520.875,1520.86


### Drop Nan Rows

In [12]:
df.dropna(inplace=True)
df.shape

(714946, 31)

# Calculate possible combination

In [11]:
math.factorial((df.shape[1])-4)

1485715964481761497309522733620825737885569961284688766942216863704985393094065876545992131370884059645617234469978112000000000000000000000

# Test

In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
import os
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")


In [15]:
from sklearn.model_selection import train_test_split


In [19]:
df[columnss]

Unnamed: 0_level_0,Tenkan_sen_norm_1X,Kijun_sen_norm_1X,Senkou_span_A_norm_1X,Senkou_span_B_norm_1X,Tenkan_sen_adv_1X,Kijun_sen_adv_1X,Senkou_span_A_adv_1X,Senkou_span_B_adv_1X,Chikou_span_adv_1X,Tenkan_sen_norm_2X,...,Chikou_span_adv_2X,Tenkan_sen_norm_3X,Kijun_sen_norm_3X,Senkou_span_A_norm_3X,Senkou_span_B_norm_3X,Tenkan_sen_adv_3X,Kijun_sen_adv_3X,Senkou_span_A_adv_3X,Senkou_span_B_adv_3X,Chikou_span_adv_3X
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01 20:15:00,1096.115,1096.110,1096.1125,1096.145,1096.085,1096.150,1096.1175,1096.150,1096.05,1096.110,...,1096.15,1096.110,1096.145,1096.1275,1096.155,1096.19,1096.16,1096.175,1096.105,1096.08
2010-01-01 20:20:00,1096.115,1096.110,1096.1125,1096.145,1096.085,1096.150,1096.1175,1096.150,1096.06,1096.110,...,1096.10,1096.110,1096.145,1096.1275,1096.155,1096.19,1096.16,1096.175,1096.105,1096.10
2010-01-01 20:25:00,1096.115,1096.110,1096.1125,1096.145,1096.090,1096.150,1096.1200,1096.150,1096.05,1096.110,...,1096.07,1096.110,1096.145,1096.1275,1096.155,1096.19,1096.16,1096.175,1096.105,1096.09
2010-01-01 20:30:00,1096.115,1096.110,1096.1125,1096.145,1096.090,1096.150,1096.1200,1096.150,1096.10,1096.110,...,1096.10,1096.110,1096.145,1096.1275,1096.155,1096.19,1096.16,1096.175,1096.105,1096.07
2010-01-01 20:35:00,1096.115,1096.110,1096.1125,1096.145,1096.115,1096.150,1096.1325,1096.150,1096.09,1096.110,...,1096.07,1096.110,1096.145,1096.1275,1096.155,1096.18,1096.16,1096.170,1096.105,1096.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-01 23:35:00,1518.875,1518.875,1518.8750,1519.315,1519.310,1520.040,1519.6750,1520.495,1518.95,1518.875,...,1521.95,1518.875,1519.770,1519.3225,1520.660,1520.66,1520.66,1520.660,1520.875,1521.70
2020-01-01 23:40:00,1518.875,1518.875,1518.8750,1518.955,1519.300,1519.680,1519.4900,1520.495,1518.49,1518.875,...,1521.16,1518.875,1519.770,1519.3225,1520.660,1520.66,1520.66,1520.660,1520.875,1521.20
2020-01-01 23:45:00,1519.920,1518.875,1519.3975,1518.875,1519.300,1519.485,1519.3925,1520.495,1518.50,1518.875,...,1520.80,1518.875,1519.770,1519.3225,1520.660,1520.66,1520.66,1520.660,1520.875,1520.33
2020-01-01 23:50:00,1519.740,1518.875,1519.3075,1518.875,1519.225,1519.415,1519.3200,1520.425,1518.05,1518.875,...,1520.23,1518.875,1519.770,1519.3225,1520.660,1520.66,1520.66,1520.660,1520.875,1520.86


In [20]:
x_train, x_test, y_train, y_test = train_test_split(df[columnss], df["Close"], test_size=0.3)

In [21]:
model = keras.models.Sequential()
model.add(keras.layers.LSTM(units=64,
							return_sequences=True,
							input_shape=(x_train.shape[1], 1)))
model.add(keras.layers.LSTM(units=64))
model.add(keras.layers.Dense(32))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1))
model.summary()


<bound method Model.summary of <keras.src.engine.sequential.Sequential object at 0x7f2841184eb0>>

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 27, 64)            16896     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 52033 (203.25 KB)
Trainable params: 52033 (203.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
model.compile(optimizer='adam',
			loss='mean_squared_error')
history = model.fit(x_train,
					y_train,
					epochs=8)


Epoch 1/8


2023-10-10 20:27:50.476911: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 108099792 exceeds 10% of free system memory.


Epoch 2/8

KeyboardInterrupt: 

In [26]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
 
# evaluation metrics
mse = np.mean(((predictions - y_test) ** 2))
print("MSE", mse)
print("RMSE", np.sqrt(mse))

2023-10-10 20:38:11.054626: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 46328544 exceeds 10% of free system memory.




NameError: name 'scaler' is not defined