In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import joblib
from datetime import date 
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Stock list for 20 companies
stocks = ["AAPL","MSFT","GOOGL","AMZN","META","TSLA","NFLX","NVDA","INTC","AMD","BABA",
          "ORCL","PYPL","ADBE","CRM","IBM","KO","PEP","DIS","UBER"]

start = "2015-01-01"
end   = date.today().strftime("%Y-%m-%d")
print("Downloading stock data...")

# Download data for all stocks
data = yf.download(tickers=stocks, start=start, end=end)
data = data[['Open', 'High', 'Low', 'Close']]

# Dictionaries to store results
models = {}      
scalers = {}      
histories = {}  
predictions = {}  

# Function to create sequences
def create_sequences(dataset, lookback=60):
    x, y = [], []
    for i in range(lookback, len(dataset)):
        x.append(dataset[i-lookback:i, :])   # all features (OHLC + SMA30)
        y.append(dataset[i, 3])              # 3rd index = Close
    return np.array(x), np.array(y)

for ticker in stocks:
    print(f"🚀 Processing {ticker}...")
    
    try:
        # Extract OHLC for one ticker - CORRECTED
        df_ticker = data.xs(ticker, level=1, axis=1)
        df_ticker = df_ticker[['Open', 'High', 'Low', 'Close']].fillna(method="ffill")
        
        # Add SMA30
        df_ticker["SMA30"] = df_ticker["Close"].rolling(window=30).mean().fillna(method="bfill")
        
        # Create a new scaler for EACH ticker
        scaler = MinMaxScaler(feature_range=(0,1))
        scaled_data = scaler.fit_transform(df_ticker)
        
        # Save scaler for this ticker
        scalers[ticker] = scaler
        joblib.dump(scaler, f"{ticker}_scaler.save")
        
        # Sequence creation
        x, y = create_sequences(scaled_data, 60)
        
        # Train-test split
        train_size = int(len(x) * 0.8)
        x_train, x_test = x[:train_size], x[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        
        print(f"   Data shape for {ticker}: {x_train.shape}")
        
        # Build model
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
        model.add(Dropout(0.2))
        model.add(LSTM(60, return_sequences=True))
        model.add(Dropout(0.3))
        model.add(LSTM(80, return_sequences=True))
        model.add(Dropout(0.4))
        model.add(LSTM(120))
        model.add(Dropout(0.5))
        model.add(Dense(1))
        
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        # Callbacks
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        checkpoint = ModelCheckpoint(f"{ticker}_best_lstm_model.h5", monitor='val_loss',
                                     save_best_only=True, verbose=1)
        
        # Train model
        print(f"   Training {ticker} model...")
        history = model.fit(
            x_train, y_train,
            epochs=100,  # Reduced for testing, you can increase later
            batch_size=32,
            validation_data=(x_test, y_test),
            callbacks=[early_stop, checkpoint],
            verbose=1
        )
        
        # Save model and history
        model.save(f"{ticker}_lstm_model.h5")
        models[ticker] = model
        histories[ticker] = history
        
        print(f"✅ Completed {ticker}")
        
    except Exception as e:
        print(f"❌ Error processing {ticker}: {str(e)}")
        continue

print("Training completed!")

  data = yf.download(tickers=stocks, start=start, end=end)
[*****                 10%                       ]  2 of 20 completed

Downloading stock data...


[*********************100%***********************]  20 of 20 completed
  df_ticker = df_ticker[['Open', 'High', 'Low', 'Close']].fillna(method="ffill")
  df_ticker["SMA30"] = df_ticker["Close"].rolling(window=30).mean().fillna(method="bfill")
  super().__init__(**kwargs)


🚀 Processing AAPL...
   Data shape for AAPL: (2096, 60, 5)
   Training AAPL model...
Epoch 1/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.0193
Epoch 1: val_loss improved from None to 0.02137, saving model to AAPL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 70ms/step - loss: 0.0087 - val_loss: 0.0214
Epoch 2/100
[1m65/66[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - loss: 0.0028
Epoch 2: val_loss improved from 0.02137 to 0.00289, saving model to AAPL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 64ms/step - loss: 0.0024 - val_loss: 0.0029
Epoch 3/100
[1m65/66[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - loss: 0.0021
Epoch 3: val_loss did not improve from 0.00289
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 64ms/step - loss: 0.0025 - val_loss: 0.0032
Epoch 4/100
[1m65/66[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 60ms/step - loss: 0.0022
Epoch 4: val_loss improved from 0.00289 to 0.00236, saving model to AAPL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - loss: 0.0021 - val_loss: 0.0024
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.0020
Epoch 5: val_loss improved from 0.00236 to 0.00223, saving model to AAPL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step - loss: 0.0020 - val_loss: 0.0022
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0020
Epoch 6: val_loss did not improve from 0.00223
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step - loss: 0.0020 - val_loss: 0.0211
Epoch 7/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0026
Epoch 7: val_loss did not improve from 0.00223
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - loss: 0.0023 - val_loss: 0.0024
Epoch 8/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0021
Epoch 8: val_loss did not improve from 0.00223
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0018 - val_loss: 0.0055
Epoch 9/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - loss: 0.0018
Epoch 9: val_loss did 



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 124ms/step - loss: 0.0016 - val_loss: 0.0018
Epoch 11/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - loss: 0.0015
Epoch 11: val_loss did not improve from 0.00184
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 132ms/step - loss: 0.0015 - val_loss: 0.0062
Epoch 12/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - loss: 0.0015
Epoch 12: val_loss did not improve from 0.00184
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 132ms/step - loss: 0.0015 - val_loss: 0.0050
Epoch 13/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - loss: 0.0013
Epoch 13: val_loss did not improve from 0.00184
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 132ms/step - loss: 0.0013 - val_loss: 0.0024
Epoch 14/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - loss: 0.0018
Epoch 14



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 127ms/step - loss: 0.0015 - val_loss: 0.0016
Epoch 17/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - loss: 0.0016
Epoch 17: val_loss did not improve from 0.00157
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 96ms/step - loss: 0.0016 - val_loss: 0.0062
Epoch 18/100
[1m65/66[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 65ms/step - loss: 0.0014
Epoch 18: val_loss did not improve from 0.00157
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 71ms/step - loss: 0.0012 - val_loss: 0.0024
Epoch 19/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.0013
Epoch 19: val_loss did not improve from 0.00157
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 70ms/step - loss: 0.0012 - val_loss: 0.0058
Epoch 20/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0012
Epoch 20: val_l



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 79ms/step - loss: 0.0011 - val_loss: 0.0013
Epoch 25/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0012
Epoch 25: val_loss did not improve from 0.00130
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0012 - val_loss: 0.0013
Epoch 26/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0013
Epoch 26: val_loss did not improve from 0.00130
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 27/100
[1m65/66[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 70ms/step - loss: 9.5005e-04
Epoch 27: val_loss did not improve from 0.00130
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 0.0010 - val_loss: 0.0018
Epoch 28/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0011
Epoch 28: va



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0011 - val_loss: 0.0012
Epoch 30/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 9.0797e-04
Epoch 30: val_loss did not improve from 0.00124
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 9.1919e-04 - val_loss: 0.0060
Epoch 31/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0012
Epoch 31: val_loss did not improve from 0.00124
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.0013 - val_loss: 0.0028
Epoch 32/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0012
Epoch 32: val_loss did not improve from 0.00124
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0011 - val_loss: 0.0013
Epoch 33/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 9.1637e-04
Epoc



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 9.7760e-04 - val_loss: 0.0012
Epoch 35/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 9.6780e-04
Epoch 35: val_loss improved from 0.00118 to 0.00104, saving model to AAPL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 9.2749e-04 - val_loss: 0.0010
Epoch 36/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 9.2491e-04
Epoch 36: val_loss did not improve from 0.00104
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.0010 - val_loss: 0.0014
Epoch 37/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0012
Epoch 37: val_loss did not improve from 0.00104
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 74ms/step - loss: 0.0012 - val_loss: 0.0011
Epoch 38/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0010
Epoch 38: val_loss did not improve from 0.00104
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 9.7165e-04 - val_loss: 0.0012
Epoch 39/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 8.8750e-04




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 9.3287e-04 - val_loss: 0.0010
Epoch 44/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0010
Epoch 44: val_loss did not improve from 0.00101
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 74ms/step - loss: 0.0010 - val_loss: 0.0040
Epoch 45/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 8.9607e-04
Epoch 45: val_loss did not improve from 0.00101
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 8.6544e-04 - val_loss: 0.0018
Epoch 46/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0010
Epoch 46: val_loss did not improve from 0.00101
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 74ms/step - loss: 9.6521e-04 - val_loss: 0.0057
Epoch 47/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 9.8028e



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 8.4178e-04 - val_loss: 9.5401e-04
Epoch 51/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 0.0011
Epoch 51: val_loss did not improve from 0.00095
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 81ms/step - loss: 0.0010 - val_loss: 0.0010
Epoch 52/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - loss: 8.8348e-04
Epoch 52: val_loss did not improve from 0.00095
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 91ms/step - loss: 9.0868e-04 - val_loss: 0.0016
Epoch 53/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - loss: 0.0010
Epoch 53: val_loss did not improve from 0.00095
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 91ms/step - loss: 0.0011 - val_loss: 0.0013
Epoch 54/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 8.5391e



✅ Completed AAPL
🚀 Processing MSFT...
   Data shape for MSFT: (2096, 60, 5)
   Training MSFT model...
Epoch 1/100


  df_ticker = df_ticker[['Open', 'High', 'Low', 'Close']].fillna(method="ffill")
  df_ticker["SMA30"] = df_ticker["Close"].rolling(window=30).mean().fillna(method="bfill")
  super().__init__(**kwargs)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 0.0149
Epoch 1: val_loss improved from None to 0.00509, saving model to MSFT_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 90ms/step - loss: 0.0063 - val_loss: 0.0051
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.0021
Epoch 2: val_loss did not improve from 0.00509
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0018 - val_loss: 0.0111
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0017
Epoch 3: val_loss did not improve from 0.00509
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.0017 - val_loss: 0.0057
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0016
Epoch 4: val_loss improved from 0.00509 to 0.00505, saving model to MSFT_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 0.0016 - val_loss: 0.0051
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 0.0018
Epoch 5: val_loss improved from 0.00505 to 0.00221, saving model to MSFT_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 0.0016 - val_loss: 0.0022
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0016
Epoch 6: val_loss did not improve from 0.00221
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.0015 - val_loss: 0.0024
Epoch 7/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.0014
Epoch 7: val_loss did not improve from 0.00221
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - loss: 0.0015 - val_loss: 0.0117
Epoch 8/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - loss: 0.0013
Epoch 8: val_loss did not improve from 0.00221
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.0012 - val_loss: 0.0039
Epoch 9/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - loss: 0.0011
Epoch 9: val_loss impro



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 82ms/step - loss: 0.0013 - val_loss: 0.0015
Epoch 10/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 0.0011
Epoch 10: val_loss did not improve from 0.00145
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 84ms/step - loss: 0.0012 - val_loss: 0.0073
Epoch 11/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0012
Epoch 11: val_loss did not improve from 0.00145
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 83ms/step - loss: 0.0011 - val_loss: 0.0024
Epoch 12/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0011
Epoch 12: val_loss did not improve from 0.00145
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 83ms/step - loss: 0.0011 - val_loss: 0.0018
Epoch 13/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0011
Epoch 13: val_lo



[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 84ms/step - loss: 0.0011 - val_loss: 0.0012
Epoch 14/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 9.6910e-04
Epoch 14: val_loss did not improve from 0.00121
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 83ms/step - loss: 9.4872e-04 - val_loss: 0.0027
Epoch 15/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.0011
Epoch 15: val_loss did not improve from 0.00121
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 82ms/step - loss: 0.0011 - val_loss: 0.0022
Epoch 16/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 0.0011
Epoch 16: val_loss did not improve from 0.00121
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 85ms/step - loss: 0.0010 - val_loss: 0.0013
Epoch 17/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 9.6504e-04
Epoc



✅ Completed MSFT
🚀 Processing GOOGL...
   Data shape for GOOGL: (2096, 60, 5)


  df_ticker = df_ticker[['Open', 'High', 'Low', 'Close']].fillna(method="ffill")
  df_ticker["SMA30"] = df_ticker["Close"].rolling(window=30).mean().fillna(method="bfill")
  super().__init__(**kwargs)


   Training GOOGL model...
Epoch 1/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - loss: 0.0167
Epoch 1: val_loss improved from None to 0.00433, saving model to GOOGL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 201ms/step - loss: 0.0078 - val_loss: 0.0043
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - loss: 0.0023
Epoch 2: val_loss improved from 0.00433 to 0.00204, saving model to GOOGL_best_lstm_model.h5




[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 182ms/step - loss: 0.0022 - val_loss: 0.0020
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - loss: 0.0021
Epoch 3: val_loss did not improve from 0.00204
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 177ms/step - loss: 0.0019 - val_loss: 0.0044
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - loss: 0.0018
Epoch 4: val_loss did not improve from 0.00204
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 179ms/step - loss: 0.0018 - val_loss: 0.0097
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - loss: 0.0017
Epoch 5: val_loss did not improve from 0.00204
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 172ms/step - loss: 0.0017 - val_loss: 0.0033
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - loss: 0.0017

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from datetime import date, timedelta
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

# Function to create sequences (same as training)
def create_sequences(dataset, lookback=60):
    x, y = [], []
    for i in range(lookback, len(dataset)):
        x.append(dataset[i-lookback:i, :])   # all features (OHLC + SMA30)
        y.append(dataset[i, 3])              # 3rd index = Close
    return np.array(x), np.array(y)

# Function to test a single model
def test_model(ticker, start_date="2023-01-01", end_date=None):
    if end_date is None:
        end_date = date.today().strftime("%Y-%m-%d")
    
    print(f"Testing {ticker}...")
    
    try:
        # Load the saved model and scaler
        model = load_model(f"{ticker}_lstm_model.h5")
        scaler = joblib.load(f"{ticker}_scaler.save")
        
        # Download fresh data for testing period
        test_data = yf.download(ticker, start=start_date, end=end_date)
        test_data = test_data[['Open', 'High', 'Low', 'Close']]
        
        # Add SMA30
        test_data["SMA30"] = test_data["Close"].rolling(window=30).mean().fillna(method="bfill")
        
        # Scale the test data using the same scaler
        scaled_test_data = scaler.transform(test_data)
        
        # Create sequences for testing
        x_test, y_test = create_sequences(scaled_test_data, 60)
        
        if len(x_test) == 0:
            print(f"Not enough data to test {ticker}")
            return None, None, None
        
        # Make predictions
        predictions = model.predict(x_test)
        
        # Inverse transform predictions and actual values
        # Create dummy arrays for inverse transformation
        dummy_array_pred = np.zeros((len(predictions), scaled_test_data.shape[1]))
        dummy_array_pred[:, 3] = predictions.flatten()  # Close price is at index 3
        predictions_actual = scaler.inverse_transform(dummy_array_pred)[:, 3]
        
        dummy_array_actual = np.zeros((len(y_test), scaled_test_data.shape[1]))
        dummy_array_actual[:, 3] = y_test
        y_test_actual = scaler.inverse_transform(dummy_array_actual)[:, 3]
        
        # Calculate metrics
        mse = mean_squared_error(y_test_actual, predictions_actual)
        mae = mean_absolute_error(y_test_actual, predictions_actual)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test_actual, predictions_actual)
        
        # Calculate accuracy (percentage of correct direction predictions)
        direction_accuracy = np.mean(
            (np.sign(predictions_actual[1:] - predictions_actual[:-1]) == 
             np.sign(y_test_actual[1:] - y_test_actual[:-1])).astype(int)
        ) * 100
        
        print(f" {ticker} Test Results:")
        print(f"   MSE: {mse:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}")
        print(f"   R² Score: {r2:.4f}")
        print(f"   Direction Accuracy: {direction_accuracy:.2f}%")
        
        # Plot results
        plt.figure(figsize=(12, 6))
        plt.plot(test_data.index[60:60+len(y_test_actual)], y_test_actual, label='Actual', linewidth=2)
        plt.plot(test_data.index[60:60+len(predictions_actual)], predictions_actual, label='Predicted', linewidth=2)
        plt.title(f'{ticker} Stock Price Prediction')
        plt.xlabel('Date')
        plt.ylabel('Price ($)')
        plt.legend()
        plt.grid(True)
        plt.savefig(f"{ticker}_prediction_test.png")
        plt.close()
        
        # Save results to CSV
        results_df = pd.DataFrame({
            'Date': test_data.index[60:60+len(y_test_actual)],
            'Actual': y_test_actual,
            'Predicted': predictions_actual
        })
        results_df.to_csv(f"{ticker}_test_results.csv", index=False)
        
        return y_test_actual, predictions_actual, {
            'mse': mse, 'mae': mae, 'rmse': rmse, 'r2': r2, 'direction_accuracy': direction_accuracy
        }
        
    except Exception as e:
        print(f"Error testing {ticker}: {str(e)}")
        return None, None, None

# Function to test all models
def test_all_models(start_date="2023-01-01", end_date=None):
    results = {}
    
    for ticker in stocks:
        try:
            actual, predicted, metrics = test_model(ticker, start_date, end_date)
            if metrics:
                results[ticker] = metrics
        except:
            continue
    
    # Print summary of all results
    if results:
        print("\n" + "="*60)
        print("SUMMARY OF ALL MODEL PERFORMANCES")
        print("="*60)
        
        results_df = pd.DataFrame(results).T
        results_df = results_df.sort_values('rmse')
        
        print(results_df)
        
        # Plot comparison of RMSE values
        plt.figure(figsize=(12, 8))
        results_df['rmse'].sort_values().plot(kind='barh')
        plt.title('RMSE Comparison Across Stocks')
        plt.xlabel('RMSE')
        plt.tight_layout()
        plt.savefig("all_models_rmse_comparison.png")
        plt.close()
        
        # Save summary results
        results_df.to_csv("all_models_test_results.csv")
        
    return results

# Function to predict future prices
def predict_future(ticker, days=30):
    print(f"🔮 Predicting next {days} days for {ticker}...")
    
    try:
        # Load the saved model and scaler
        model = load_model(f"{ticker}_lstm_model.h5")
        scaler = joblib.load(f"{ticker}_scaler.save")
        
        # Download recent data (last 60+ days)
        end_date = date.today()
        start_date = end_date - timedelta(days=120)  # Get more data than needed
        recent_data = yf.download(ticker, start=start_date, end=end_date)
        recent_data = recent_data[['Open', 'High', 'Low', 'Close']]
        
        # Add SMA30
        recent_data["SMA30"] = recent_data["Close"].rolling(window=30).mean().fillna(method="bfill")
        
        # Get the last 60 days of data
        last_60_days = recent_data.iloc[-60:].copy()
        scaled_data = scaler.transform(last_60_days)
        
        future_predictions = []
        current_sequence = scaled_data.copy()
        
        for _ in range(days):
            # Reshape the sequence for prediction
            x = current_sequence[-60:].reshape(1, 60, scaled_data.shape[1])
            
            # Predict next day
            next_pred = model.predict(x, verbose=0)
            
            # Create a new row for the prediction
            new_row = current_sequence[-1].copy()  # Copy the last row
            new_row[3] = next_pred[0, 0]  # Update the Close price
            
            # For other features, we can use simple assumptions
            # In a real application, you might want to predict these too or use better methods
            new_row[0] = new_row[3] * 0.99  # Open slightly lower than close
            new_row[1] = new_row[3] * 1.01  # High slightly higher than close
            new_row[2] = new_row[3] * 0.98  # Low slightly lower than close
            
            # Append to sequence and future predictions
            current_sequence = np.vstack([current_sequence, new_row])
            future_predictions.append(new_row)
        
        # Inverse transform the future predictions
        future_predictions = np.array(future_predictions)
        future_predictions_actual = scaler.inverse_transform(future_predictions)[:, 3]  # Close prices
        
        # Create future dates
        last_date = recent_data.index[-1]
        future_dates = [last_date + timedelta(days=i) for i in range(1, days+1)]
        
        # Plot historical and future predictions
        plt.figure(figsize=(14, 7))
        plt.plot(recent_data.index[-60:], recent_data['Close'][-60:], label='Historical', linewidth=2)
        plt.plot(future_dates, future_predictions_actual, label='Predicted', linewidth=2, color='red')
        plt.title(f'{ticker} Future Price Prediction (Next {days} Days)')
        plt.xlabel('Date')
        plt.ylabel('Price ($)')
        plt.legend()
        plt.grid(True)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(f"{ticker}_future_prediction.png")
        plt.close()
        
        # Create results DataFrame
        future_df = pd.DataFrame({
            'Date': future_dates,
            'Predicted_Close': future_predictions_actual
        })
        
        future_df.to_csv(f"{ticker}_future_predictions.csv", index=False)
        
        print(f"Future predictions saved for {ticker}")
        return future_df
        
    except Exception as e:
        print(f"Error predicting future for {ticker}: {str(e)}")
        return None

# Main execution for testing
print("\n" + "="*50)
print("STARTING MODEL TESTING")
print("="*50)

# Test all models
results = test_all_models()

# Predict future for a specific stock (e.g., AAPL)
predict_future("AAPL", days=30)

print("\nTesting completed! Check the generated charts and CSV files for results.")


STARTING MODEL TESTING
Testing AAPL...


  test_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  test_data["SMA30"] = test_data["Close"].rolling(window=30).mean().fillna(method="bfill")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
 AAPL Test Results:
   MSE: 91.1733, MAE: 7.4554, RMSE: 9.5485
   R² Score: 0.8478
   Direction Accuracy: 49.01%




Testing MSFT...


  test_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  test_data["SMA30"] = test_data["Close"].rolling(window=30).mean().fillna(method="bfill")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
 MSFT Test Results:
   MSE: 222.0809, MAE: 11.7250, RMSE: 14.9024
   R² Score: 0.9325
   Direction Accuracy: 49.01%
Testing GOOGL...


  test_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  test_data["SMA30"] = test_data["Close"].rolling(window=30).mean().fillna(method="bfill")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
 GOOGL Test Results:
   MSE: 54.5105, MAE: 5.8444, RMSE: 7.3831
   R² Score: 0.9161
   Direction Accuracy: 52.48%
Testing AMZN...


  test_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  test_data["SMA30"] = test_data["Close"].rolling(window=30).mean().fillna(method="bfill")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
 AMZN Test Results:
   MSE: 42.3150, MAE: 4.9060, RMSE: 6.5050
   R² Score: 0.9684
   Direction Accuracy: 47.69%

SUMMARY OF ALL MODEL PERFORMANCES
              mse        mae       rmse        r2  direction_accuracy
AMZN    42.315000   4.906004   6.504998  0.968392           47.689769
GOOGL   54.510515   5.844412   7.383124  0.916050           52.475248
AAPL    91.173337   7.455372   9.548473  0.847794           49.009901
MSFT   222.080926  11.725040  14.902380  0.932498           49.009901




🔮 Predicting next 30 days for AAPL...


  recent_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  recent_data["SMA30"] = recent_data["Close"].rolling(window=30).mean().fillna(method="bfill")


Future predictions saved for AAPL

Testing completed! Check the generated charts and CSV files for results.
