In [3]:
!pip3 install seaborn tensorflow pandas tabulate shap

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [None]:
# Updated Transformer pipeline with Saliency Maps (Input Gradient Attribution)

import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from tqdm import tqdm
import time
import atexit
from tabulate import tabulate

# Create log file with timestamp
log_time = datetime.now().strftime("%Y%m%d_%H%M%S")
os.makedirs("log", exist_ok=True)
log_filename = os.path.join("log", f"transformer_log_{log_time}.txt")
file_handler = logging.FileHandler(log_filename, mode='w')
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.WARNING)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(processName)s - %(levelname)s - %(message)s',
    handlers=[file_handler, stream_handler]
)
logger = logging.getLogger(__name__)
atexit.register(lambda: [h.flush() for h in logger.handlers if hasattr(h, 'flush')])

# Configuration
CONFIG = {
    'input_csv': '../../data/daily_stock_price/sp500_top25_technical_indicators.csv',
    'output_csv': 'transformer_evaluation_results_walk_forward.csv',
    'target': 'Close',
    'look_back': 60,
    'forecast_horizon': 1,
    'batch_size': 128,
    'epochs': 1,
    'walkforward_retrain_step': 100,
    'train_start': '2001-01-01',
    'train_end': '2020-12-31',
    'test_start': '2021-01-01'
}

FEATURE_COLUMNS = [
    'Close', 'SMA_20', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist',
    'BB_Upper', 'BB_Lower', 'ATR_14', 'OBV',
    'Close_Lag_1', 'Close_Lag_2', 'Close_Lag_3', 'Close_Lag_5',
    'Volume_Lag_1', 'Volume_Lag_3', 'Daily_Return', 'Volatility_20',
    'High_Low_Range', 'Open_Close_Range', 'MACD_Hist_Slope'
]

def build_transformer_with_attention(look_back, n_features):
    inputs = tf.keras.Input(shape=(look_back, n_features))
    attn_output = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=16)(inputs, inputs)
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output + inputs)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    prediction = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs=inputs, outputs=prediction)
    model.compile(optimizer='adam', loss='mse')
    return model

def create_sequences(data, look_back, forecast_horizon, target_index=0):
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:i+look_back, :])
        y.append(data[i + look_back:i + look_back + forecast_horizon, target_index])
    return np.array(X), np.array(y)

def process_stock(stock, df):
    os.makedirs("saliency_outputs", exist_ok=True)
    start_time = time.time()

    try:
        logger.info(f"Processing {stock}...")
        stock_df = df[df['symbol'] == stock].copy()
        stock_df['date'] = pd.to_datetime(stock_df['date'])
        stock_df = stock_df[(stock_df['date'] >= CONFIG['train_start'])]
        stock_df.sort_values('date', inplace=True)
        stock_df.set_index('date', inplace=True)

        features = stock_df[FEATURE_COLUMNS].dropna()
        if features.empty:
            logger.warning(f"{stock}: No data after dropping NaNs, skipping.")
            return None

        scaler = MinMaxScaler()
        features_scaled = scaler.fit_transform(features)
        target_index = FEATURE_COLUMNS.index(CONFIG['target'])
        X, y = create_sequences(features_scaled, CONFIG['look_back'], CONFIG['forecast_horizon'], target_index)

        dates = stock_df.index[CONFIG['look_back'] + CONFIG['forecast_horizon'] - 1:]
        date_mask = (dates >= pd.to_datetime(CONFIG['test_start']))
        if not any(date_mask):
            logger.warning(f"{stock}: No test data after {CONFIG['test_start']}, skipping.")
            return None

        split_idx = np.where(date_mask)[0][0]
        X_train_full, y_train_full = X[:split_idx], y[:split_idx]
        X_test, y_test = X[split_idx:], y[split_idx:]

        early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)
        model = build_transformer_with_attention(CONFIG['look_back'], X.shape[2])
        model.fit(X_train_full, y_train_full, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], verbose=0, callbacks=[early_stop])

        predictions = []
        history_X, history_y = X_train_full.tolist(), y_train_full.tolist()

        for i in tqdm(range(len(X_test)), desc=f"{stock}: Walk-forward steps", leave=False):
            if i % CONFIG['walkforward_retrain_step'] == 0 and i > 0:
                model = build_transformer_with_attention(CONFIG['look_back'], X.shape[2])
                model.fit(np.array(history_X), np.array(history_y), epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], verbose=0, callbacks=[early_stop])

            pred = model.predict(X_test[i:i+1], verbose=0)
            predictions.append(pred[0][0])
            history_X.append(X_test[i].tolist())
            history_y.append([y_test[i][0]])

        preds_2d = np.tile(np.array(predictions)[:, np.newaxis], (1, X.shape[2]))
        y_test_flat = y_test[:, 0]
        y_true_2d = np.tile(y_test_flat[:, np.newaxis], (1, X.shape[2]))

        preds_inv = scaler.inverse_transform(preds_2d)[:, target_index]
        y_true_inv = scaler.inverse_transform(y_true_2d)[:, target_index]

        mse = mean_squared_error(y_true_inv, preds_inv)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true_inv, preds_inv)
        r2 = r2_score(y_true_inv, preds_inv)

        logger.info(f"{stock} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")
        logger.info(f"{stock}: Done in {time.time() - start_time:.1f}s")

        try:
            input_sample = X_test[-1]
            input_tensor = tf.convert_to_tensor(input_sample[np.newaxis, ...])
            with tf.GradientTape() as tape:
                tape.watch(input_tensor)
                prediction = model(input_tensor)
            grads = tape.gradient(prediction, input_tensor).numpy()[0]
            saliency = np.mean(np.abs(grads), axis=0)

            plt.figure(figsize=(12, 6))
            plt.barh(FEATURE_COLUMNS, saliency)
            plt.xlabel("Average Absolute Gradient")
            plt.title(f"Saliency Map - {stock}")
            plt.tight_layout()
            plt.savefig(os.path.join("saliency_outputs", f"saliency_{stock}.png"))
            plt.close()
        except Exception as grad_err:
            logger.warning(f"Saliency map failed for {stock}: {grad_err}")

        return {'Stock': stock, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'R2': r2}

    except Exception as e:
        logger.error(f"Error processing {stock}: {e}")
        return None

def generate_saliency_dashboard(output_dir="saliency_outputs", output_html="saliency_dashboard.html"):
    saliency_files = [f for f in os.listdir(output_dir) if f.startswith("saliency_") and f.endswith(".png")]
    with open(output_html, "w") as f:
        f.write("<html><head><title>Saliency Map Dashboard</title></head><body>")
        f.write("<h1>Saliency Maps</h1>")
        for img_file in sorted(saliency_files):
            f.write(f"<h2>{img_file.replace('saliency_', '').replace('.png', '')}</h2>")
            f.write(f"<img src='{os.path.join(output_dir, img_file)}' style='width:800px'><hr/>")
        f.write("</body></html>")

def train_and_evaluate_transformer():
    hyperparams = [
        {'batch_size': 64, 'epochs': 1, 'walkforward_retrain_step': 100},
        {'batch_size': 128, 'epochs': 2, 'walkforward_retrain_step': 50},
        {'batch_size': 256, 'epochs': 3, 'walkforward_retrain_step': 25}
    ]

    df = pd.read_csv(CONFIG['input_csv'])
    all_results = []

    subset_stocks = ['AAPL', 'MSFT', 'NVDA']
    stocks = [s for s in df['symbol'].unique() if s in subset_stocks]

    for idx, params in enumerate(hyperparams):
        logger.info(f">>> Running Hyperparameter Set {idx+1}: {params}")
        CONFIG.update(params)
        results = []

        for stock in tqdm(stocks, desc=f"Set {idx+1} - Evaluating stocks"):
            result = process_stock(stock, df)
            if result:
                result.update(params)
                results.append(result)

        result_df = pd.DataFrame(results)
        output_path = CONFIG['output_csv'].replace('.csv', f'_set{idx+1}.csv')
        header = not os.path.exists(output_path) or os.path.getsize(output_path) == 0
        result_df.to_csv(output_path, index=False, mode='a', header=header)
        all_results.append(result_df.assign(Hyperparam_Set=idx+1))

    final_df = pd.concat(all_results, ignore_index=True)
    summary = final_df.groupby("Hyperparam_Set")[['MSE', 'RMSE', 'MAE', 'R2']].mean().reset_index()
    print("Performance Summary:")
    print(tabulate(summary, headers='keys', tablefmt='github', showindex=False))

    for metric in ['MSE', 'RMSE', 'MAE', 'R2']:
        plt.figure()
        sns.barplot(data=summary, x='Hyperparam_Set', y=metric)
        plt.title(f'Mean {metric} by Hyperparameter Set')
        plt.savefig(f'plot_{metric.lower()}.png')
        plt.close()

    generate_saliency_dashboard()
    return final_df

if __name__ == '__main__':
    logger.info("Starting Transformer model training and evaluation...")
    result_df = train_and_evaluate_transformer()
    if result_df is not None:
        logger.info("Evaluation complete.")
        print(result_df)


Set 1 - Evaluating stocks:   0%|          | 0/3 [00:00<?, ?it/s]2025-04-30 23:17:32.326137: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-04-30 23:17:32.326161: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-04-30 23:17:32.326173: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-04-30 23:17:32.326187: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-30 23:17:32.326197: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-04-30 23:17:32.839140: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin opti

Performance Summary:
|   Hyperparam_Set |     MSE |    RMSE |     MAE |        R2 |
|------------------|---------|---------|---------|-----------|
|                1 | 5691.55 | 63.882  | 51.2802 | -0.619001 |
|                2 | 3309.57 | 54.118  | 41.2375 | -0.25774  |
|                3 | 3078.97 | 50.6133 | 40.0437 | -0.142662 |
  Stock           MSE        RMSE         MAE        R2  batch_size  epochs  \
0  AAPL   1790.528584   42.314638   32.411157 -0.547743          64       1   
1  MSFT  14431.471026  120.131058  101.527092 -1.849819          64       1   
2  NVDA    852.653404   29.200230   19.902350  0.540558          64       1   
3  AAPL   2062.027810   45.409556   36.622950 -0.782428         128       2   
4  MSFT   6585.523961   81.151241   63.522297 -0.300460         128       2   
5  NVDA   1281.147262   35.793118   23.567167  0.309669         128       2   
6  AAPL   2067.124688   45.465643   35.058787 -0.786834         256       3   
7  MSFT   6509.723221   80.68285