In [7]:
pip install tensorflow keras




In [12]:
import os
import json
import time
import requests
import pickle
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from tqdm import tqdm
from datetime import datetime
from google.colab import userdata

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

warnings.filterwarnings('ignore')

In [14]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
ALPHA_VANTAGE_API_KEY = userdata.get('VANTAGE_API_KEY')

In [16]:
top_50_tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOG', 'GOOGL', 'AMZN', 'META',
                  'AVGO', 'TSLA', 'WMT', 'LLY', 'JPM', 'V', 'UNH', 'MA',
                  'XOM', 'COST', 'NFLX', 'PG', 'ORCL', 'JNJ', 'HD', 'ABBV',
                  'KO', 'TMUS', 'BAC', 'PM', 'CRM', 'CVX', 'PLTR', 'CSCO',
                  'MCD', 'IBM', 'ABT', 'LIN', 'WFC', 'GE', 'T', 'MRK',
                  'PEP', 'VZ', 'AXP', 'ACN', 'MS', 'ISRG', 'RTX', 'NOW',
                  'TMO', 'INTU', 'BX']

os.makedirs("/content/drive/MyDrive/stock_data", exist_ok=True)
os.makedirs("/content/drive/MyDrive/models/LSTM", exist_ok=True)
os.makedirs("/content/drive/MyDrive/models/Scalers", exist_ok=True)
os.makedirs("/content/drive/MyDrive/outputs/plots", exist_ok=True)
os.makedirs("/content/drive/MyDrive/outputs/lstm_predictions", exist_ok=True)
os.makedirs("/content/drive/MyDrive/outputs/metrics", exist_ok=True)

In [40]:
def fetch_monthly_stock_data(ticker):
  url = f"https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}&datatype=csv"
  # url = "https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol=IBM&apikey=demo&datatype=csv"
  path = f"/content/drive/MyDrive/stock_data/{ticker}.csv"
  # path = f"/content/drive/MyDrive/stock_data/IBM.csv"

  try:
    response=requests.get(url)
    if response.status_code == 200:
      with open(path, "w") as f:
        f.write(response.text)
      return True
    else:
      print(f"Failed to fetch for {ticker}: HTTP {response.status_code}")
      return False

  except Exception as e:
    print(f"Error for {ticker}: {e}")
    return False



In [28]:
# fetch_monthly_stock_data("IBM")

True

In [41]:
# Preprocessing
def create_dataset(dataset, look_back=30):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        X.append(dataset[i:(i + look_back), 0])
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

def train_lstm_model(ticker):
    path = f"/content/drive/MyDrive/stock_data/{ticker}.csv"
    if not os.path.exists(path):
        print(f"No CSV for {ticker}")
        return None

    df = pd.read_csv(path)
    LOOK_BACK = 30

    if df.shape[0] < LOOK_BACK + 10:
        print(f"Skipping {ticker} (too little data)")
        return None

    df = df.sort_values("timestamp")
    df["timestamp"] = pd.to_datetime(df["timestamp"])

    dataset = df['close'].dropna().values.astype('float32')
    dataset = np.reshape(dataset, (-1, 1))

    scaler = MinMaxScaler()
    dataset_scaled = scaler.fit_transform(dataset)

    pickle.dump(scaler, open(f"/content/drive/MyDrive/models/Scalers/{ticker}.pkl", "wb"))

    TRAIN_SIZE = int(len(dataset_scaled) * 0.7)
    TEST_SIZE = len(dataset_scaled) - TRAIN_SIZE

    train = dataset_scaled[:TRAIN_SIZE]
    test = dataset_scaled[TRAIN_SIZE - LOOK_BACK:]

    X_train, Y_train = create_dataset(train, LOOK_BACK)
    X_test, Y_test = create_dataset(test, LOOK_BACK)

    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

    model = Sequential()
    model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    history = model.fit(X_train, Y_train, epochs=200, batch_size=70,
              validation_data=(X_test, Y_test), verbose=0, shuffle=False)

    model.save(f"/content/drive/MyDrive/models/LSTM/{ticker}.h5")

    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    train_pred = scaler.inverse_transform(train_pred)
    Y_train_unscaled = scaler.inverse_transform([Y_train])
    test_pred = scaler.inverse_transform(test_pred)
    Y_test_unscaled = scaler.inverse_transform([Y_test])

    train_mae = mean_absolute_error(Y_train_unscaled[0], train_pred[:,0])
    test_mae = mean_absolute_error(Y_test_unscaled[0], test_pred[:,0])
    train_rmse = np.sqrt(mean_squared_error(Y_train_unscaled[0], train_pred[:,0]))
    test_rmse = np.sqrt(mean_squared_error(Y_test_unscaled[0], test_pred[:,0]))
    train_mape = mean_absolute_percentage_error(Y_train_unscaled[0], train_pred[:,0])
    test_mape = mean_absolute_percentage_error(Y_test_unscaled[0], test_pred[:,0])

    # Plot loss
    plt.figure(figsize=(8, 4))
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Val')
    plt.title(f"{ticker} Loss")
    plt.legend()
    plt.savefig(f"/content/drive/MyDrive/outputs/plots/{ticker}_loss.png")
    plt.close()

    # Plot test predictions
    dates = df["timestamp"].iloc[-TEST_SIZE:]
    plt.figure(figsize=(10, 4))
    plt.plot(dates, Y_test_unscaled[0][:TEST_SIZE], marker='.', label="Actual")
    plt.plot(dates, test_pred[:,0][:TEST_SIZE], 'r', label="Predicted")
    plt.legend()
    plt.title(f"{ticker} Test Predictions")
    plt.savefig(f"/content/drive/MyDrive/outputs/plots/{ticker}_predictions.png")
    plt.close()

    lstm_preds = [-1] * LOOK_BACK
    lstm_preds += [x[0] for x in train_pred]
    lstm_preds += [x[0] for x in test_pred]
    df["lstm_predictions"] = lstm_preds[:len(df)]

    df[["timestamp", "close", "lstm_predictions"]].to_csv(
        f"/content/drive/MyDrive/outputs/lstm_predictions/{ticker}_predictions.csv", index=False)

    return {
        "ticker": ticker,
        "train_mae": train_mae,
        "test_mae": test_mae,
        "train_rmse": train_rmse,
        "test_rmse": test_rmse,
        "train_mape": train_mape,
        "test_mape": test_mape
    }


In [38]:
# train_lstm_model("IBM")



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


{'ticker': 'IBM',
 'train_mae': 6.39724527912122,
 'test_mae': 8.781804506382247,
 'train_rmse': np.float64(8.08774999645887),
 'test_rmse': np.float64(11.445882253551577),
 'train_mape': 0.05288060572428115,
 'test_mape': 0.059483580236401316}

In [42]:
all_metrics = []

for i in range(25):
    ticker = top_50_tickers[i]
    print(f"Processing {ticker}")
    if not fetch_monthly_stock_data(ticker):
        continue

    print(f"Training LSTM for {ticker}")
    metric = train_lstm_model(ticker)
    if metric:
        all_metrics.append(metric)

Processing AAPL
Training LSTM for AAPL




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Processing MSFT
Training LSTM for MSFT




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Processing NVDA
Training LSTM for NVDA




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 167ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Processing GOOG
Training LSTM for GOOG




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Processing GOOGL
Training LSTM for GOOGL




[1m1/5[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 170ms/step



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Processing AMZN
Training LSTM for AMZN




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Processing META
Training LSTM for META




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Processing AVGO
Training LSTM for AVGO




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 259ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Processing TSLA
Training LSTM for TSLA




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processing WMT
Training LSTM for WMT




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Processing LLY
Training LSTM for LLY




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing JPM
Training LSTM for JPM




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Processing V
Training LSTM for V




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Processing UNH
Training LSTM for UNH




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Processing MA
Training LSTM for MA




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step  
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
Processing XOM
Training LSTM for XOM




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Processing COST
Training LSTM for COST




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Processing NFLX
Training LSTM for NFLX




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Processing PG
Training LSTM for PG




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Processing ORCL
Training LSTM for ORCL




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Processing JNJ
Training LSTM for JNJ




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Processing HD
Training LSTM for HD




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Processing ABBV
Training LSTM for ABBV




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 139ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Processing KO
Training LSTM for KO




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Processing TMUS
Training LSTM for TMUS
Skipping TMUS (too little data)
