In [2]:
import pandas as pd
import numpy as np

df_es = pd.read_csv('/Users/kush/Desktop/EP_M1.csv', sep='\t')
df_nq = pd.read_csv('/Users/kush/Desktop/ENQ_M1.csv', sep='\t')

###### S&P ######

# Combine the date and time columns into a single datetime column
df_es['datetime'] = pd.to_datetime(df_es['<DATE>'] + ' ' + df_es['<TIME>'])

# Drop the unnecessary columns
df_es = df_es.drop(columns=['<DATE>', '<TIME>', '<TICKVOL>', '<VOL>', '<SPREAD>'])

# Rename columns to remove symbols and make them lowercase
df_es.columns = [col.replace('<', '').replace('>', '').lower() for col in df_es.columns]
df_es = df_es[['datetime', 'open', 'high', 'low', 'close']]

# Display the updated dataframe
df_es.head()

###### NASDAQ ######

# Combine the date and time columns into a single datetime column
df_nq['datetime'] = pd.to_datetime(df_nq['<DATE>'] + ' ' + df_nq['<TIME>'])

# Drop the unnecessary columns
df_nq = df_nq.drop(columns=['<DATE>', '<TIME>', '<TICKVOL>', '<VOL>', '<SPREAD>'])

# Rename columns to remove symbols and make them lowercase
df_nq.columns = [col.replace('<', '').replace('>', '').lower() for col in df_nq.columns]
df_nq = df_nq[['datetime', 'open', 'high', 'low', 'close']]

# Display the updated dataframe
df_nq.head()

# Combine the two datasets based on the datetime column
combined_data = pd.merge(df_es, df_nq, on='datetime', suffixes=('_es', '_nq'))

# Convert the datetime column to a datetime object
combined_data['datetime'] = pd.to_datetime(combined_data['datetime'])

combined_data.set_index('datetime', inplace=True)

In [18]:
ICT_statement_data = combined_data.loc['2022-01-01':'2022-03-16']
ICT_statement_data.to_csv('ict_statement_data.csv', index=True)

In [14]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from keras.models import Sequential
from keras.layers import Conv1D, LSTM, Dense, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load your historical_data_with_correct_labels.csv file
historical_data_df = pd.read_csv('ict_historical_data_with_correct_labels_v2.csv')

# Define the window size
window_size = 60

# Extract the features (OHLC) and labels from the historical data
features = historical_data_df[['open_es', 'high_es', 'low_es', 'close_es', 'open_nq', 'high_nq', 'low_nq', 'close_nq']].values
labels = historical_data_df['label'].values

# Change class labels to 0, 1, and 2
labels[labels == -1] = 2

# Calculate class weights based on the inverse of their frequency
unique_classes = np.unique(labels)
class_weights = compute_class_weight(class_weight='balanced', classes=unique_classes, y=labels)
class_weights_dict = dict(zip(unique_classes, class_weights))

# Multiply the weights for "buy" and "sell" classes by a factor for more dramatic weights
weight_factor = 100
class_weights_dict[1] *= weight_factor
class_weights_dict[2] *= weight_factor

# Normalize the features using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
features_scaled = scaler.fit_transform(features)

# Create input sequences (windows) from the normalized features
def create_windows(data, window_size):
    windows = []
    for i in range(window_size, len(data)):
        windows.append(data[i - window_size:i])
    return np.array(windows)

X = create_windows(features_scaled, window_size)
y = labels[window_size:]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Define the hybrid LSTM-CNN model architecture
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(window_size, 8)))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(LSTM(units=50, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='tanh'))

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with class weights
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), class_weight=class_weights_dict, batch_size=64, epochs=10, verbose=1)

# Evaluate the model's performance on the testing set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


Epoch 1/10


2023-04-16 21:22:02.518298: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:02.683856: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:02.729094: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:02.837758: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:02.897588: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-04-16 21:22:21.914812: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:21.985792: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-04-16 21:22:22.021131: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

In [10]:
# Calculate class weights based on the inverse of their frequency
unique_classes = np.unique(labels)
class_weights = compute_class_weight(class_weight='balanced', classes=unique_classes, y=labels)
class_weights_dict = dict(zip(unique_classes, class_weights))

In [11]:
class_weights_dict

{-1: 165.02947845804988, 0: 0.3347315359071299, 1: 154.51804670912952}

In [12]:
dict(enumerate(class_weights))

{0: 165.02947845804988, 1: 0.3347315359071299, 2: 154.51804670912952}