In [6]:
# Standard Library Imports
import json
import os
from collections import defaultdict
from itertools import product
from pickle import dump

# Third-Party Library Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ta
import yfinance as yf
import xgboost as xgb

# Scikit-learn Imports
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    f1_score,
    mean_absolute_error,
    mean_squared_error,
)
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.tree import DecisionTreeClassifier

# TensorFlow/Keras Imports
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Bidirectional, Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

## Data Preprocessing

In [7]:
def df_for_inp(data, prices, ticker: str, include_sentiment: bool = True) -> pd.DataFrame:
    """
    Prepares a DataFrame of stock prices optionally merged with Twitter sentiment data for a specific stock ticker.

    Parameters:
        data (pd.DataFrame): Twitter sentiment data with 'Stock Name' and 'Day' columns (optional if sentiment is excluded).
        prices (pd.DataFrame): Stock price data with 'Stock Name' and 'Date' columns.
        ticker (str): The stock ticker to filter for.
        include_sentiment (bool): If True, includes sentiment data in the final DataFrame.

    Returns:
        pd.DataFrame: A cleaned DataFrame ready for modeling or analysis.
    """
    # Filter and preprocess price data
    price_df = prices[prices['Stock Name'] == ticker].copy()
    price_df['Date'] = pd.to_datetime(price_df['Date'])

    if include_sentiment:
        # Filter Twitter data for the specified stock
        twitter_data = data[data['Stock Name'] == ticker].drop(columns=['Stock Name'])

        # Group by day and compute mean sentiment scores
        twitter_df = twitter_data.groupby('Day').mean()
        twitter_df['Date'] = pd.to_datetime(twitter_df.index)
        twitter_df = twitter_df.reset_index(drop=True)

        # Merge Twitter and price data
        df = pd.merge(twitter_df, price_df, on='Date', how='inner')
    else:
        # Use only price data
        df = price_df.copy()

    # Final clean-up
    df = df.sort_values(by='Date').reset_index(drop=True)
    df = df.drop(columns=['Date', 'Stock Name'], errors='ignore')

    return df

In [8]:
def download_stock_data(ticker, start_date, end_date):
    """
    Download and clean historical stock data for a given ticker.
    """
    data = yf.download(ticker, start=start_date, end=end_date)
    data.reset_index(inplace=True)
    data['Stock Name'] = ticker.upper()
    data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Stock Name']]
    return data

def get_stock_data(tickers, start_date, end_date, temp_file="clean.csv"):
    """
    Downloads, saves, reads, and aggregates stock data for a list of tickers.
    """
    final_df = pd.DataFrame()

    for i, ticker in enumerate(tickers):
        # Download and save to CSV
        df = download_stock_data(ticker, start_date, end_date)
        df.to_csv(temp_file, index=False)

        # Read the saved CSV
        df = pd.read_csv(temp_file)

        # Skip the first row (for some reason this is part of your logic)
        df = df.iloc[1:].reset_index(drop=True)

        # Combine data
        final_df = pd.concat([final_df, df], ignore_index=True)

    # Convert data types
    cols_to_convert = ['Open', 'High', 'Low', 'Close', 'Volume']
    final_df[cols_to_convert] = final_df[cols_to_convert].astype(float)

    # Convert 'Date' column to datetime
    final_df['Date'] = pd.to_datetime(final_df['Date'])

    # Optional: Clean up the temp file if needed
    if os.path.exists(temp_file):
        os.remove(temp_file)

    return final_df

In [9]:
def split_X_y(df, target_col, method='minmax', include_sent=True, n_input=5, n_output=1, add_indicators=True):
    """
    Prepares time-series input and binary target sequences for model training and testing.

    Parameters:
        df (pd.DataFrame): Input DataFrame containing stock prices and optional sentiment columns.
        target_col (str): Column name to use as the target for prediction (typically 'Close').
        method (str): Scaling method to apply. Options are:
                      - 'minmax' for MinMaxScaler
                      - 'standard' for StandardScaler
                      - None for no scaling
        include_sent (bool): Whether to include sentiment columns ('negative', 'neutral', 'positive') in features.
        n_input (int): Number of time steps in the input sequence.
        n_output (int): Number of time steps ahead to define the target (used for binary classification).
        add_indicators (bool): Whether to add technical indicators (SMA, RSI, MACD) to features.

    Returns:
        X_train (np.ndarray): Training feature sequences of shape (samples, n_input, features).
        y_train (np.ndarray): Binary labels for training set (1 if future > current, else 0).
        X_test (np.ndarray): Test feature sequences.
        y_test (np.ndarray): Test labels.
        scaler_x (sklearn Scaler or None): Scaler object used for feature transformation (useful for inverse transform).
    """

    df = df.copy()

    # Step 1: Optionally drop sentiment columns
    if not include_sent:
        for col in ['negative', 'neutral', 'positive']:
            if col in df.columns:
                df = df.drop(columns=[col])

    # Step 2: Train-test split (raw, BEFORE any indicators)
    num_in_train_raw = round(len(df) * 0.7)
    df_train_raw = df.iloc[:num_in_train_raw].copy()
    df_test_raw = df.iloc[num_in_train_raw - n_input - n_output + 1:].copy()

    # Step 3: Add indicators SEPARATELY for train and test
    if add_indicators:
        for temp_df in [df_train_raw, df_test_raw]:
            temp_df['SMA_10'] = ta.trend.sma_indicator(temp_df['Close'], window=10)
            temp_df['SMA_20'] = ta.trend.sma_indicator(temp_df['Close'], window=20)
            temp_df['RSI'] = ta.momentum.rsi(temp_df['Close'], window=14)

            macd = ta.trend.MACD(temp_df['Close'])
            temp_df['MACD'] = macd.macd()
            temp_df['MACD_signal'] = macd.macd_signal()

    # Step 4: Drop rows with NaN values (from indicators)
    df_train_raw = df_train_raw.dropna().reset_index(drop=True)
    df_test_raw = df_test_raw.dropna().reset_index(drop=True)

    # Step 5: Select feature columns (exclude target)
    feature_cols = df_train_raw.columns.tolist()

    # Step 6: Scaling
    scaler_x = None
    if method is not None:
        if method == 'minmax':
            Scaler = MinMaxScaler
        elif method == 'standard':
            Scaler = StandardScaler
        else:
            raise ValueError("method must be 'minmax', 'standard', or None.")

        scaler_x = Scaler()
        scaler_x.fit(df_train_raw[feature_cols])
        df_train_raw[feature_cols] = scaler_x.transform(df_train_raw[feature_cols])
        df_test_raw[feature_cols] = scaler_x.transform(df_test_raw[feature_cols])

    # Step 7: Sequence creation
    def make_sequences(data):
        X_seq, y_seq = [], []
        for i in range(len(data) - n_input - n_output + 1):
            X_seq.append(data.iloc[i:i + n_input][feature_cols].values)
            last_close = data.iloc[i + n_input - 1][target_col]
            future_close = data.iloc[i + n_input + n_output - 1][target_col]
            y_seq.append(1 if future_close > last_close else 0)
        return np.array(X_seq), np.array(y_seq)

    X_train, y_train = make_sequences(df_train_raw)
    X_test, y_test = make_sequences(df_test_raw)

    return X_train, y_train, X_test, y_test, scaler_x


#### Make the dataset

In [10]:
# Load the sentiment data from a CSV file
data_sent = pd.read_csv('data/data_sentiment.csv')

# Preprocessing
data_sent['Date'] = pd.to_datetime(data_sent['Date'])
data_sent['Day'] = data_sent['Date'].dt.date
data_sent.drop(columns=['Date', 'Tweet', 'Company Name'], inplace=True)

In [11]:
# Load the historical prices data from a CSV file
prices_hist = pd.read_csv('data/stock_yfinance_data.csv')

In [19]:
tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL']
final_df = get_stock_data(tickers, '2024-07-20', '2025-07-20')

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***

## Trading Simulation

In [12]:
def simulate_intraday_long_short(model, X_test_flat, price_data_open, price_data_close, initial_cash=10000):
    """
    Simulate daily long/short strategy:
    - Go long if prediction is class 1, close at end of day.
    - Go short if prediction is class 0, cover at end of day.

    Parameters:
        model: Trained classifier with predict method
        X_test_flat: Features for prediction
        price_data_open: Opening prices aligned with X_test
        price_data_close: Closing prices aligned with X_test
        initial_cash: Starting capital

    Returns:
        final_cash: Final value after trading
        trades: List of trade details
    """
    preds = model.predict(X_test_flat)
    cash = initial_cash
    trades = []

    for i in range(len(preds)):
        open_price = price_data_open[i]
        close_price = price_data_close[i]

        if preds[i] == 1:
            # Long trade: Buy at open, sell at close
            shares = cash // open_price
            profit = shares * (close_price - open_price)
            cash += profit
            trades.append({
                'day': i,
                'action': 'long',
                'open_price': open_price,
                'close_price': close_price,
                'shares': shares,
                'profit': profit
            })

        elif preds[i] == 0:
            # Short trade: Sell borrowed shares at open, buy back at close
            shares = cash // open_price
            profit = shares * (open_price - close_price)
            cash += profit
            trades.append({
                'day': i,
                'action': 'short',
                'open_price': open_price,
                'close_price': close_price,
                'shares': shares,
                'profit': profit
            })

    return cash, trades

## Training

In [None]:
def create_lstm_model(n_input, num_features, depth=3):
    model = tf.keras.Sequential()
    for i in range(depth):
        return_sequences = i < depth - 1
        units = max(128 // (2 ** i), 32)
        if i == 0:
            model.add(layers.LSTM(units, return_sequences=return_sequences, input_shape=(n_input, num_features)))
        else:
            model.add(layers.LSTM(units, return_sequences=return_sequences))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(0.3))

    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model



class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, sequence_length, num_features, embed_dim):
        super().__init__()
        self.dense_proj = layers.Dense(embed_dim)
        self.pos_emb = layers.Embedding(input_dim=sequence_length, output_dim=embed_dim)

    def call(self, x, training=None):
        x_proj = self.dense_proj(x)
        positions = tf.range(start=0, limit=tf.shape(x)[1], delta=1)
        positions = self.pos_emb(positions)
        return x_proj + positions


def create_transformer_model(n_input, num_features, depth=1):
    embed_dim = 64
    num_heads = 2
    ff_dim = 128

    inputs = layers.Input(shape=(n_input, num_features))
    x = TokenAndPositionEmbedding(n_input, num_features, embed_dim)(inputs)

    for _ in range(depth):
        x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, activation='relu')(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model



In [None]:
def train_and_evaluate_all_models(tickers, scalers, sent_flags, models_to_run, n_inputs=[5], n_runs=5, technical_indicators=True, depths=[{'LSTM': 3, 'Transformer': 1}]):
    results = []

    model_creators = {
        'LSTM': create_lstm_model,
        'Transformer': create_transformer_model,
        'XGBoost': None,  # Special case
        'LinearRegression': None  # Special case
    }


    for scaler in scalers:
      for ti in technical_indicators:
        for n_input in n_inputs:
            for depth in depths:
              for ticker in tickers:
                for sent in sent_flags:

                  df = df_for_inp(data_sent, prices_hist, ticker, sent)
                  X_train, y_train, X_test, y_test, _ = split_X_y(df, 'Close', scaler, include_sent=sent, n_input=n_input, add_indicators=ti)

                  for model_name in models_to_run:
                      for run in range(n_runs):
                          print(f"[{model_name}] {ticker} | Scaler: {scaler} | Sent: {sent} | Run {run + 1} | Technical Indicators {ti} | Lag window {n_input} | Depth {depth}")

                          train_loss, val_loss, y_pred_probs = None, None, None

                          if model_name == 'XGBoost':
                              X_train_flat = X_train.reshape(X_train.shape[0], -1)
                              X_test_flat = X_test.reshape(X_test.shape[0], -1)
                              model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', verbosity=0)
                              model.fit(X_train_flat, y_train)
                              y_pred = model.predict(X_test_flat)

                          elif model_name == 'LinearRegression':
                              X_train_flat = X_train.reshape(X_train.shape[0], -1)
                              X_test_flat = X_test.reshape(X_test.shape[0], -1)
                              model = LinearRegression()
                              model.fit(X_train_flat, y_train)
                              y_pred_probs = model.predict(X_test_flat)
                              y_pred = (y_pred_probs > 0.5).astype(int)

                          else:
                              model_creator = model_creators[model_name]
                              depth_temp = depth.get(model_name, 1)  # Default depth is 1
                              model = model_creator(X_train.shape[1], X_train.shape[2], depth=depth_temp)
                              early_stop = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
                              history = model.fit(X_train, y_train, epochs=500, batch_size=16, validation_split=0.1, callbacks=[early_stop], verbose=0)
                              train_loss = history.history['loss']
                              val_loss = history.history['val_loss']
                              y_pred_probs = model.predict(X_test)
                              y_pred = (y_pred_probs > 0.5).astype(int).flatten()

                          acc = accuracy_score(y_test, y_pred)
                          f1 = f1_score(y_test, y_pred, zero_division=0)
                          class_dist = dict(zip(*np.unique(y_pred, return_counts=True)))

                          results.append({
                              'model': model_name,
                              'ticker': ticker,
                              'scaler': scaler,
                              'sentiment': sent,
                              'run': run + 1,
                              'accuracy': acc,
                              'f1_score': f1,
                              'train_loss': train_loss,
                              'val_loss': val_loss,
                              'class_distribution': class_dist,
                              'lag': n_input,
                              'pred_probs': y_pred_probs if y_pred_probs is not None else None,
                              'technical_indicators': ti,
                              'depth': depth_temp
                          })

                          pd.DataFrame(results).to_csv("training_results2.csv", index=False)

    return pd.DataFrame(results)


In [None]:
tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL']
scalers = ['standard']#, 'minmax'None,
sent_flags = [True, False]
models_to_run = ['LSTM', 'XGBoost', 'LinearRegression', 'Transformer']
technical_indicators = [True, False]
lag_windows = [5, 10, 15]
depths = [{'LSTM': i, 'Transformer': i} for i in range(10,17,3)] + [{'LSTM': i, 'Transformer': i} for i in range(19,34,7)]

results_df = train_and_evaluate_all_models(tickers, scalers, sent_flags, models_to_run, n_inputs=lag_windows, n_runs=3, technical_indicators=technical_indicators, depths=depths)

## Trees

In [20]:
def train_and_evaluate_models(tickers, scalers, sent_flags, models_to_run,
                               n_inputs=[5], n_runs=5, technical_indicators=True,
                               xgb_params=None, ada_params=None):

    results = []

    # Default hyperparameters if none provided
    if xgb_params is None:
        xgb_params = {
            'max_depth': [3],
            'min_child_weight': [1],
            'gamma': [0],
            'learning_rate': [0.1],
            'n_estimators': [100]
        }

    if ada_params is None:
        ada_params = {
            'base_estimator__max_depth': [1],
            'learning_rate': [1.0],
            'n_estimators': [50]
        }

    for scaler in scalers:
        for ti in technical_indicators:
            for n_input in n_inputs:
                for ticker in tickers:
                    for sent in sent_flags:

                        df = df_for_inp(data_sent, final_df, ticker, sent)
                        X_train, y_train, X_test, y_test, _ = split_X_y(df, 'Close', scaler, include_sent=sent, n_input=n_input, add_indicators=ti)

                        # Flatten inputs for classical ML
                        X_train_flat = X_train.reshape(X_train.shape[0], -1)
                        X_test_flat = X_test.reshape(X_test.shape[0], -1)

                        for model_name in models_to_run:
                            for run in range(n_runs):

                                print(f"[{model_name}] {ticker} | Scaler: {scaler} | Sent: {sent} | Run {run + 1} | TI: {ti} | Lag: {n_input}")

                                if model_name == 'XGBoost':
                                    for max_depth in xgb_params['max_depth']:
                                        for min_child_weight in xgb_params['min_child_weight']:
                                            for gamma in xgb_params['gamma']:
                                                for lr in xgb_params['learning_rate']:
                                                    for n_estimators in xgb_params['n_estimators']:
                                                        model = xgb.XGBClassifier(
                                                            use_label_encoder=False,
                                                            eval_metric='logloss',
                                                            verbosity=0,
                                                            max_depth=max_depth,
                                                            min_child_weight=min_child_weight,
                                                            gamma=gamma,
                                                            learning_rate=lr,
                                                            n_estimators=n_estimators
                                                        )
                                                        model.fit(X_train_flat, y_train)
                                                        y_pred = model.predict(X_test_flat)

                                                        acc = accuracy_score(y_test, y_pred)
                                                        f1 = f1_score(y_test, y_pred, zero_division=0)
                                                        class_dist = dict(zip(*np.unique(y_pred, return_counts=True)))

                                                                                                   # Prepare price series
                                                        test_open_prices = df.iloc[-len(X_test):]['Open'].values
                                                        test_close_prices = df.iloc[-len(X_test):]['Close'].values

                                                        # Simulate long/short trading
                                                        profit = portfolio_value, trade_log = simulate_intraday_long_short(
                                                            model=model,
                                                            X_test_flat=X_test_flat,
                                                            price_data_open=test_open_prices,
                                                            price_data_close=test_close_prices,
                                                            initial_cash=10000
                                                        )

                                                        results.append({
                                                            'model': 'XGBoost',
                                                            'ticker': ticker,
                                                            'scaler': scaler,
                                                            'sentiment': sent,
                                                            'run': run + 1,
                                                            'accuracy': acc,
                                                            'f1_score': f1,
                                                            'train_loss': None,
                                                            'val_loss': None,
                                                            'class_distribution': class_dist,
                                                            'lag': n_input,
                                                            'pred_probs': None,
                                                            'technical_indicators': ti,
                                                            'xgb_max_depth': max_depth,
                                                            'xgb_min_child_weight': min_child_weight,
                                                            'xgb_gamma': gamma,
                                                            'learning_rate': lr,
                                                            'n_estimators': n_estimators,
                                                            'profit': profit[0] - 10000
                                                        })

                                elif model_name == 'AdaBoost':
                                    for max_depth in ada_params['estimator__max_depth']:
                                        for lr in ada_params['learning_rate']:
                                            for n_estimators in ada_params['n_estimators']:
                                                base_estimator = DecisionTreeClassifier(max_depth=max_depth)
                                                model = AdaBoostClassifier(
                                                    estimator=base_estimator,
                                                    learning_rate=lr,
                                                    n_estimators=n_estimators
                                                )
                                                model.fit(X_train_flat, y_train)
                                                y_pred = model.predict(X_test_flat)

                                                acc = accuracy_score(y_test, y_pred)
                                                f1 = f1_score(y_test, y_pred, zero_division=0)
                                                class_dist = dict(zip(*np.unique(y_pred, return_counts=True)))

                                                # Prepare price series
                                                test_open_prices = df.iloc[-len(X_test):]['Open'].values
                                                test_close_prices = df.iloc[-len(X_test):]['Close'].values

                                                # Simulate long/short trading
                                                profit = portfolio_value, trade_log = simulate_intraday_long_short(
                                                    model=model,
                                                    X_test_flat=X_test_flat,
                                                    price_data_open=test_open_prices,
                                                    price_data_close=test_close_prices,
                                                    initial_cash=10000
                                                )

                                                results.append({
                                                    'model': 'AdaBoost',
                                                    'ticker': ticker,
                                                    'scaler': scaler,
                                                    'sentiment': sent,
                                                    'run': run + 1,
                                                    'accuracy': acc,
                                                    'f1_score': f1,
                                                    'train_loss': None,
                                                    'val_loss': None,
                                                    'class_distribution': class_dist,
                                                    'lag': n_input,
                                                    'pred_probs': None,
                                                    'technical_indicators': ti,
                                                    'base_estimator_max_depth': max_depth,
                                                    'learning_rate': lr,
                                                    'n_estimators': n_estimators,
                                                    'profit': profit[0] - 10000
                                                })


                        pd.DataFrame(results).to_csv("training_results_updated1.csv", index=False)

    return pd.DataFrame(results)


In [None]:
tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL']
scalers = ['standard', 'minmax', None]
sent_flags = [False]
technical_indicators = [True, False]
lag_windows = [5, 10, 15]

models_to_run = ['XGBoost', 'AdaBoost']

xgb_hyperparams = {
    'max_depth': [3, 5],
    'min_child_weight': [1, 3],
    'gamma': [0, 0.2],
    'learning_rate': [0.05, 0.1],
    'n_estimators': [100, 150]
}

ada_hyperparams = {
    'estimator__max_depth': [1, 3, 4, 5],
    'learning_rate': [0.5, 1.0],
    'n_estimators': [50, 100]
}

results_df = train_and_evaluate_models(
    tickers=tickers,
    scalers=scalers,
    sent_flags=sent_flags,
    models_to_run=models_to_run,
    n_inputs=lag_windows,
    n_runs=3,
    technical_indicators=technical_indicators,
    xgb_params=xgb_hyperparams,
    ada_params=ada_hyperparams
)


In [None]:
tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL']
scalers = [None]
sent_flags = [False]
technical_indicators = [False]
lag_windows = [10]

models_to_run = ['AdaBoost']

xgb_hyperparams = {
    'max_depth': [3],
    'min_child_weight': [1],
    'gamma': [0.2],
    'learning_rate': [0.10],
    'n_estimators': [100]
}

ada_hyperparams = {
    'estimator__max_depth': [5],
    'learning_rate': [1],
    'n_estimators': [50]
}

results_df = train_and_evaluate_models(
    tickers=tickers,
    scalers=scalers,
    sent_flags=sent_flags,
    models_to_run=models_to_run,
    n_inputs=lag_windows,
    n_runs=100,
    technical_indicators=technical_indicators,
    xgb_params=xgb_hyperparams,
    ada_params=ada_hyperparams
)