In [5]:
# The code predicts Up and Down days in S&P500 by analyzing historical SMAs and training the history with neural networks

import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input # Added Input for preferred model definition

# Fetch historical data for S&P 500
symbol = "^GSPC"
start_date = "2000-01-01"
end_date = "2025-04-30" # Note: This is a fixed historical period.
                        # Data will be downloaded up to the last trading day of 2024.
df = yf.download(symbol, start=start_date, end=end_date)

if df.empty:
    print(f"No data downloaded for {symbol} from {start_date} to {end_date}. Please check the symbol or date range.")
else:
    # Calculate Simple Moving Averages (SMAs)
    df["SMA_20"] = df["Close"].rolling(window=20).mean()
    df["SMA_50"] = df["Close"].rolling(window=50).mean()
    df["SMA_diff"] = df["SMA_20"] - df["SMA_50"]

    # Drop NaN values resulting from SMA calculations
    df.dropna(inplace=True)

    # Prepare features and target
    # Create the 'Target' column. This will introduce a NaN in the last row.
    df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

    # --- FIX: Drop rows where 'Target' is NaN (i.e., the last row after shift(-1)) ---
    # This ensures X and y are aligned and NaN-free.
    df.dropna(inplace=True) # This will remove rows where any column is NaN, effectively handling the NaN in 'Target'.
                            # Alternatively, df.dropna(subset=['Target'], inplace=True) is more specific.

    if df.empty:
        print("DataFrame became empty after processing. Check data and NaN handling steps.")
    else:
        features_list = ["SMA_20", "SMA_50", "SMA_diff"]
        X = df[features_list]
        y = df["Target"]

        # Split data into training and testing sets (chronological due to shuffle=False)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

        # Scale features using StandardScaler
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Build the neural network model
        # Using Input layer is slightly more modern Keras practice
        model = Sequential([
            Input(shape=(X_train_scaled.shape[1],)), # Define input shape using Input layer
            Dense(16, activation="relu"),
            Dense(8, activation="relu"),
            Dense(1, activation="sigmoid")
        ])
        # Alternative (original way, also works):
        # model = Sequential([
        #     Dense(16, activation="relu", input_shape=(X_train_scaled.shape[1],)),
        #     Dense(8, activation="relu"),
        #     Dense(1, activation="sigmoid")
        # ])

        # Compile the model
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

        # Train the model
        print(f"Training on {X_train_scaled.shape[0]} samples.")
        model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1) # verbose=1 to see progress

        # Evaluate the model
        print(f"Evaluating on {X_test_scaled.shape[0]} samples.")
        test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0) # verbose=0 for cleaner output here
        print(f"Test Loss: {test_loss:.4f}")
        print(f"Test Accuracy: {test_acc:.4f}")

        # Print classification report
        predictions_proba = model.predict(X_test_scaled)
        y_pred = (predictions_proba > 0.5).astype(int)
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred, zero_division=0))

[*********************100%***********************]  1 of 1 completed

Training on 5056 samples.
Epoch 1/20





[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5122 - loss: 0.7044 - val_accuracy: 0.5514 - val_loss: 0.7168
Epoch 2/20
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5346 - loss: 0.6926 - val_accuracy: 0.5514 - val_loss: 0.7052
Epoch 3/20
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5301 - loss: 0.6925 - val_accuracy: 0.5514 - val_loss: 0.6994
Epoch 4/20
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5281 - loss: 0.6929 - val_accuracy: 0.5514 - val_loss: 0.6982
Epoch 5/20
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5459 - loss: 0.6899 - val_accuracy: 0.5553 - val_loss: 0.6939
Epoch 6/20
[1m143/143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5391 - loss: 0.6906 - val_accuracy: 0.5553 - val_loss: 0.6944
Epoch 7/20
[1m143/143[0m [32m━━━━━━━