In [1]:
!pip install pandas numpy matplotlib scikit-learn tensorflow keras keras-tuner



In [2]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner.tuners import RandomSearch

# Adjust path to your folder
data_folder = "C:/Users/Utente/Downloads/Data"
file_list = [f for f in os.listdir(data_folder) if f.endswith(".csv")]

In [3]:
def create_sequences(data, sequence_length):
    xs, ys = [], []
    for i in range(len(data) - sequence_length):
        x = data[i:i+sequence_length]
        y = data[i+sequence_length, 3]  # 'close'
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

def build_model(hp, input_shape):
    model = Sequential()
    model.add(LSTM(hp.Int("units1", 32, 128, step=32), return_sequences=True, input_shape=input_shape))
    model.add(Dropout(hp.Float("dropout1", 0.1, 0.5, step=0.1)))
    model.add(LSTM(hp.Int("units2", 32, 128, step=32)))
    model.add(Dropout(hp.Float("dropout2", 0.1, 0.5, step=0.1)))
    model.add(Dense(1))
    model.compile(optimizer=hp.Choice("optimizer", ["adam", "rmsprop"]), loss="mse")
    return model

In [8]:
from keras_tuner import HyperParameters
import shutil

sequence_length = 30

for file in file_list:
    stock_name = file.split(".")[0]
    print(f"\n📈 Processing {stock_name}...")

    df = pd.read_csv(os.path.join(data_folder, file), parse_dates=['Date'])

    # Clean up columns with $ and commas
    for col in ['Close/Last', 'Open', 'High', 'Low']:
        df[col] = df[col].replace('[\$,]', '', regex=True).astype(float)

    df['Volume'] = df['Volume'].replace(',', '', regex=True).astype(float)

    # Rename columns after cleanup
    df.rename(columns={
        'Close/Last': 'close',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Volume': 'volume',
        'Date': 'date'
    }, inplace=True)

    df.set_index('date', inplace=True)
    df = df[['open', 'high', 'low', 'close', 'volume']].dropna()

    # Split
    test_start = df.index.max() - pd.DateOffset(years=1)
    val_start = test_start - pd.DateOffset(months=6)

    train_data = df[df.index < val_start]
    val_data = df[(df.index >= val_start) & (df.index < test_start)]
    test_data = df[df.index >= test_start]

    # Scale
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_data)
    val_scaled = scaler.transform(val_data)
    test_scaled = scaler.transform(test_data)

    # Sequences
    X_train, y_train = create_sequences(train_scaled, sequence_length)
    X_val, y_val = create_sequences(val_scaled, sequence_length)
    X_test, y_test = create_sequences(test_scaled, sequence_length)

    X_train = X_train.reshape((-1, sequence_length, X_train.shape[2]))
    X_val = X_val.reshape((-1, sequence_length, X_val.shape[2]))
    X_test = X_test.reshape((-1, sequence_length, X_test.shape[2]))

    # Tuning
    tuner_dir = f"tuner_dir/{stock_name}_lstm"
    shutil.rmtree(tuner_dir, ignore_errors=True)

    tuner = RandomSearch(
        lambda hp: build_model(hp, (sequence_length, X_train.shape[2])),
        objective="val_loss",
        max_trials=5,
        executions_per_trial=1,
        directory=tuner_dir,
        project_name="lstm"
    )

    early_stop = EarlyStopping(monitor="val_loss", patience=3)
    tuner.search(X_train, y_train, validation_data=(X_val, y_val),
                 epochs=20, batch_size=64, callbacks=[early_stop], verbose=0)
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate
    y_pred_scaled = best_model.predict(X_test)
    n_features = scaler.n_features_in_
    close_idx = list(train_data.columns).index('close')

    pad_test = np.zeros((len(y_test), n_features))
    pad_pred = np.zeros((len(y_pred_scaled), n_features))
    pad_test[:, close_idx] = y_test
    pad_pred[:, close_idx] = y_pred_scaled[:, 0]

    y_test_rescaled = scaler.inverse_transform(pad_test)[:, close_idx]
    y_pred_rescaled = scaler.inverse_transform(pad_pred)[:, close_idx]

    test_dates = test_data.index[sequence_length:]
    results_df = pd.DataFrame({
        "date": test_dates,
        "true_close": y_test_rescaled,
        "predicted_close": y_pred_rescaled
    }).set_index("date")

    monthly = results_df.resample("MS").first().dropna().reset_index()
    monthly.insert(0, "stock", stock_name)
    out_file = f"{stock_name.lower()}_lstm_pred.csv"
    monthly.to_csv(out_file, index=False)
    print(f"✅ Saved: {out_file}")

  df[col] = df[col].replace('[\$,]', '', regex=True).astype(float)
  super().__init__(**kwargs)



📈 Processing AVGO...



  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
✅ Saved: avgo_lstm_pred.csv

📈 Processing AXP...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
✅ Saved: axp_lstm_pred.csv

📈 Processing BAC...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
✅ Saved: bac_lstm_pred.csv

📈 Processing CB...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
✅ Saved: cb_lstm_pred.csv

📈 Processing CMG...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
✅ Saved: cmg_lstm_pred.csv

📈 Processing EA...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
✅ Saved: ea_lstm_pred.csv

📈 Processing EBAY...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
✅ Saved: ebay_lstm_pred.csv

📈 Processing GRMN...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
✅ Saved: grmn_lstm_pred.csv

📈 Processing IBM...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
✅ Saved: ibm_lstm_pred.csv

📈 Processing IT...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
✅ Saved: it_lstm_pred.csv

📈 Processing LEG...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
✅ Saved: leg_lstm_pred.csv

📈 Processing MHK...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
✅ Saved: mhk_lstm_pred.csv

📈 Processing MS...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
✅ Saved: ms_lstm_pred.csv

📈 Processing ORLY...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step
✅ Saved: orly_lstm_pred.csv

📈 Processing XL...


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
✅ Saved: xl_lstm_pred.csv
