In [23]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ta  # For technical indicators
from alpha_vantage.timeseries import TimeSeries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import accuracy_score, classification_report

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries installed and imported successfully!")


Libraries installed and imported successfully!


In [24]:
# If the file is in the same directory
df = pd.read_csv("SPY_1hour_data.csv")
print(df.head())

                  date    Open    High     Low   Close    Volume
0  2025-04-24 04:00:00  533.73  533.73  531.10  532.04   74339.0
1  2025-04-24 05:00:00  532.14  533.48  532.04  533.20  127711.0
2  2025-04-24 06:00:00  533.20  536.45  533.02  535.96  196029.0
3  2025-04-24 07:00:00  535.96  536.03  533.55  534.30  462107.0
4  2025-04-24 08:00:00  535.76  536.40  531.29  536.11  677985.0


In [25]:
# Compute Lagged Returns
df["Return"] = df["Close"].pct_change()

# Compute Exponential Moving Averages (EMA)
df["EMA_5"] = df["Close"].ewm(span=5, adjust=False).mean()
df["EMA_10"] = df["Close"].ewm(span=10, adjust=False).mean()

# Compute Relative Strength Index (RSI)
df["RSI_14"] = ta.momentum.RSIIndicator(df["Close"], window=14).rsi()

# Compute MACD (Moving Average Convergence Divergence)
macd = ta.trend.MACD(df["Close"])
df["MACD"] = macd.macd()
df["MACD_Signal"] = macd.macd_signal()

# Compute On-Balance Volume (OBV)
df["OBV"] = ta.volume.OnBalanceVolumeIndicator(df["Close"], df["Volume"]).on_balance_volume()

# Compute Bollinger Bands
bb = ta.volatility.BollingerBands(df["Close"], window=20, window_dev=2)
df["BB_High"] = bb.bollinger_hband()
df["BB_Low"] = bb.bollinger_lband()

# Drop NaN values caused by indicator calculations
df = df.dropna()

# Display first few rows
print(df.head())

                   date    Open    High     Low   Close     Volume    Return  \
33  2025-04-25 20:00:00  550.64  550.64  550.64  550.64  1571349.0 -0.001069   
34  2025-04-28 04:00:00  549.65  549.78  548.22  549.70    47499.0 -0.001707   
35  2025-04-28 05:00:00  549.78  550.62  549.37  550.04    27488.0  0.000619   
36  2025-04-28 06:00:00  550.11  550.11  548.82  549.02    43898.0 -0.001854   
37  2025-04-28 07:00:00  548.94  550.41  548.77  549.99   146216.0  0.001767   

         EMA_5      EMA_10     RSI_14      MACD  MACD_Signal         OBV  \
33  550.644728  549.839271  67.870734  2.619426     2.663617  90346854.0   
34  550.329819  549.813949  62.550387  2.441682     2.619230  90299355.0   
35  550.233213  549.855049  63.660016  2.301721     2.555728  90326843.0   
36  549.828808  549.703222  58.098405  2.084467     2.461476  90282945.0   
37  549.882539  549.755364  61.539561  1.967878     2.362756  90429161.0   

       BB_High      BB_Low  
33  552.612917  544.084543  
34  

In [26]:
df["Target"] = df["Return"].shift(-1)  # Predict next return
df = df.dropna()  # Drop last row with NaN in target


In [27]:
# Target magnitude check. If the range is small and the model just outputs something near zero, MSE will seem small, but R² will be terrible.
print(df["Target"].describe())

count    340.000000
mean       0.000154
std        0.003367
min       -0.013804
25%       -0.000904
50%        0.000051
75%        0.001221
max        0.026828
Name: Target, dtype: float64


In [28]:
feature_cols = [
    "Return", "EMA_5", "EMA_10", "RSI_14", "MACD", "MACD_Signal", 
    "OBV", "BB_High", "BB_Low"
]


LSTMs expect sequences as input. We convert the dataframe into overlapping sequences of a fixed window size. A typical window is 24 for hourly data (i.e., one day)

In [29]:
import numpy as np

def create_sequences(df, feature_cols, target_col, window_size):
    X, y = [], []
    for i in range(len(df) - window_size):
        features = df[feature_cols].iloc[i:i+window_size].values
        target = df[target_col].iloc[i+window_size]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y)

window_size = 24  # one trading day of hourly data
X, y = create_sequences(df, feature_cols, "Target", window_size)


In [30]:
# Train-test split
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]


We'll use MinMaxScaler or StandardScaler to scale features between 0 and 1 (or mean=0, std=1). Since we’re dealing with sequential data, you need to scale each feature independently across the entire training set and apply the same transformation to the test set.

In [31]:
# Scale Features (Only on Training Set!)
from sklearn.preprocessing import MinMaxScaler

# Initialize scaler
scaler = MinMaxScaler()

# Flatten the training set to 2D for fitting the scaler
X_train_2d = X_train.reshape(-1, X_train.shape[2])
X_test_2d = X_test.reshape(-1, X_test.shape[2])

# Fit on training data only
scaler.fit(X_train_2d)

# Transform both sets
X_train_scaled = scaler.transform(X_train_2d).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test_2d).reshape(X_test.shape)


Let’s build a simple LSTM model using Keras. We’ll use one LSTM layer followed by a Dense output layer to predict the next return.

In [32]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(50, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1))  # Predict next return (regression)

model.compile(optimizer='adam', loss='mse')  # Use binary_crossentropy if you're classifying
model.summary()


  super().__init__(**kwargs)


In [33]:
# Train the model
history = model.fit(
    X_train_scaled, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test_scaled, y_test),
    verbose=1
)


Epoch 1/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0588 - val_loss: 0.0077
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0200 - val_loss: 0.0220
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0148 - val_loss: 0.0014
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0142 - val_loss: 0.0018
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0102 - val_loss: 0.0022
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0075 - val_loss: 2.5115e-04
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0091 - val_loss: 2.2576e-04
Epoch 8/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0079 - val_loss: 7.0308e-04
Epoch 9/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [34]:
# Evaluate the model
from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse:.6f}, R2 Score: {r2:.3f}")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
MSE: 0.000486, R2 Score: -53.410


As we expected - horrible R2. 

In [35]:
from sklearn.metrics import r2_score

r2_naive = r2_score(y_test, np.zeros_like(y_test))
print(f"R² of naive zero-return model: {r2_naive:.3f}")


R² of naive zero-return model: -0.017


Eve the naïve model (if we always predict 0 (no change)), we get a better R². Our LSTM model is learning nothing useful.

Switch to classification 

In [36]:
df["Target"] = (df["Return"].shift(-1) > 0).astype(int)


In [37]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [38]:
# Lets also check correlation of features with target
for col in feature_cols:
    corr = df[col].corr(df["Target"])
    print(f"{col}: {corr:.3f}")


Return: -0.013
EMA_5: 0.022
EMA_10: 0.028
RSI_14: -0.088
MACD: -0.025
MACD_Signal: 0.010
OBV: -0.048
BB_High: 0.043
BB_Low: 0.031


In [39]:
# window_size = 24  # one day of hourly data

X, y = create_sequences(df, feature_cols, "Target", window_size)


In [40]:
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]


In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(64, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(**kwargs)


In [42]:
history = model.fit(
    X_train_scaled, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test_scaled, y_test),
    verbose=1
)


Epoch 1/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.4819 - loss: 0.6973 - val_accuracy: 0.4844 - val_loss: 0.7259
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5413 - loss: 0.6865 - val_accuracy: 0.4844 - val_loss: 0.7093
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4955 - loss: 0.6948 - val_accuracy: 0.5156 - val_loss: 0.6925
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5357 - loss: 0.6888 - val_accuracy: 0.4844 - val_loss: 0.6979
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5605 - loss: 0.6890 - val_accuracy: 0.4844 - val_loss: 0.7026
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5507 - loss: 0.6886 - val_accuracy: 0.4844 - val_loss: 0.7006
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [43]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict
y_pred_probs = model.predict(X_test_scaled)
y_pred = (y_pred_probs > 0.5).astype(int)

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=3))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Accuracy: 0.53125
[[19 14]
 [16 15]]
              precision    recall  f1-score   support

           0      0.543     0.576     0.559        33
           1      0.517     0.484     0.500        31

    accuracy                          0.531        64
   macro avg      0.530     0.530     0.529        64
weighted avg      0.530     0.531     0.530        64

