<a href="https://colab.research.google.com/github/negbuna/models/blob/main/lstm_remake_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
# import data and get last n (30) days

import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [45]:
print(yf.__version__)

0.2.57


In [52]:
df = yf.download('QQQ', start='2022-01-01', end='2024-12-31')
df.columns = ['_'.join(filter(None, col)).strip() for col in df.columns.values]
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Close_QQQ,High_QQQ,Low_QQQ,Open_QQQ,Volume_QQQ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-03,393.079742,393.334185,388.382526,390.506048,40575900
2022-01-04,387.981293,393.666894,384.869386,393.627742,58027200
2022-01-05,376.062042,387.413683,375.807599,386.288281,75739800
2022-01-06,375.797791,379.056509,371.991094,374.232072,70814300
2022-01-07,371.726929,377.50063,369.945919,375.807659,72652300


creating features

In [54]:
batch_size = 32
sequence_length = 30
num_features = 11

In [56]:
rolling_means = [None] * len(df) # for assessing price action
rolling_stds = [None] * len(df)

for i in range(6, len(df)):
    window = df['Close_QQQ'][i-6:i+1]
    rolling_means[i] = window.mean()
    rolling_stds[i] = window.std()

df['Rolling_Mean_7'] = rolling_means
df['Rolling_Std_7'] = rolling_stds

delta = df['Close_QQQ'].diff()
gain = (delta.where(delta > 0, 0)).rolling(14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))

ema_12 = df['Close_QQQ'].ewm(span=12, adjust=False).mean() # to find trend reversals
ema_26 = df['Close_QQQ'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema_12 - ema_26
df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
df['MACD_hist'] = df['MACD'] - df['MACD_signal']

In [58]:
train_end = '2024-07-31'
test_start = '2024-08-01'
train_df = df[:train_end].copy()
test_df = df[test_start:].copy()

features = [
    'Open_QQQ', 'High_QQQ', 'Low_QQQ', 'Close_QQQ', 'Volume_QQQ',
    'Rolling_Mean_7', 'Rolling_Std_7', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist'
]

print(df.columns)
print(train_df.shape)
print(train_df[features].isna().sum())

train_df.dropna(subset=features, inplace=True)
test_df.dropna(subset=features, inplace=True)

scaler = StandardScaler()
scaler.fit(train_df[features])

train_df[features] = scaler.transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

Index(['Close_QQQ', 'High_QQQ', 'Low_QQQ', 'Open_QQQ', 'Volume_QQQ',
       'Rolling_Mean_7', 'Rolling_Std_7', 'RSI', 'MACD', 'MACD_signal',
       'MACD_hist'],
      dtype='object')
(647, 11)
Open_QQQ           0
High_QQQ           0
Low_QQQ            0
Close_QQQ          0
Volume_QQQ         0
Rolling_Mean_7     6
Rolling_Std_7      6
RSI               13
MACD               0
MACD_signal        0
MACD_hist          0
dtype: int64


training data

In [60]:
X = []
y = []

for i in range(len(train_df) - sequence_length):
    sequence = train_df.iloc[i:i+sequence_length][features].values
    label = 1 if train_df['Close_QQQ'].iloc[i + sequence_length] > train_df['Close_QQQ'].iloc[i + sequence_length - 1] else 0
    X.append(sequence)
    y.append(label)

X = np.array(X)
y = np.array(y)

testing data

In [62]:
X_test = []
y_test = []

for i in range(len(test_df) - sequence_length):
    sequence = test_df.iloc[i:i+sequence_length][features].values
    label = 1 if test_df['Close_QQQ'].iloc[i + sequence_length] > test_df['Close_QQQ'].iloc[i + sequence_length - 1] else 0
    X_test.append(sequence)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

In [63]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(64, input_shape=(sequence_length, len(features))), # shape of an input sample
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(X, y, epochs=20, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

model.save('lstm_model.h5')

  super().__init__(**kwargs)


Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - accuracy: 0.5065 - loss: 0.7036 - val_accuracy: 0.6000 - val_loss: 0.6706
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5486 - loss: 0.6868 - val_accuracy: 0.6000 - val_loss: 0.6669
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5511 - loss: 0.6825 - val_accuracy: 0.6000 - val_loss: 0.6676
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.5681 - loss: 0.6834 - val_accuracy: 0.6000 - val_loss: 0.6719
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.5319 - loss: 0.6840 - val_accuracy: 0.6000 - val_loss: 0.6760
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.5416 - loss: 0.6861 - val_accuracy: 0.6000 - val_loss: 0.6724
Epoch 7/20
[1m19/19[0m [32m━━━━



Test Accuracy: 0.60
