In [1]:
!pip3 install seaborn tensorflow pandas tabulate shap

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from tqdm import tqdm
import time
import atexit
from tabulate import tabulate
import sys
import contextlib
from io import StringIO
from joblib import Parallel, delayed
import gc
import warnings
warnings.filterwarnings("ignore")

# Configuration for CNN-Transformer Hybrid
CONFIG = {
    'input_csv': '../../data/daily_stock_price/sp500_top25_technical_indicators.csv',
    'output_csv': 'cnn_transformer_results_walk_forward.csv',
    'target': 'Close',
    'look_back': 60,
    'forecast_horizon': 1,
    'batch_size': 128,
    'epochs': 5,
    'walkforward_retrain_step': 200,
    'train_start': '2001-01-01',
    'train_end': '2020-12-31',
    'test_start': '2021-01-01',
    'hyperparam_set': 1
}

FEATURE_COLUMNS = [
    'Close', 'SMA_20', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist',
    'BB_Upper', 'BB_Lower', 'ATR_14', 'OBV',
    'Close_Lag_1', 'Close_Lag_2', 'Close_Lag_3', 'Close_Lag_5',
    'Volume_Lag_1', 'Volume_Lag_3', 'Daily_Return', 'Volatility_20',
    'High_Low_Range', 'Open_Close_Range', 'MACD_Hist_Slope'
]

log_time = datetime.now().strftime("%Y%m%d_%H%M%S")
os.makedirs("log", exist_ok=True)
log_filename = os.path.join("log", f"cnn_transformer_log_set_{CONFIG['hyperparam_set']}_{log_time}.txt")
file_handler = logging.FileHandler(log_filename, mode='w')
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.ERROR)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(processName)s - %(levelname)s - %(message)s',
    handlers=[file_handler, stream_handler]
)
logger = logging.getLogger(__name__)
atexit.register(lambda: [h.flush() for h in logger.handlers if hasattr(h, 'flush')])

def create_sequences(data, look_back, forecast_horizon, target_index=0):
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:i+look_back, :])
        y.append(data[i + look_back:i + look_back + forecast_horizon, target_index])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

def build_cnn_transformer_model(look_back, n_features):
    inputs = tf.keras.Input(shape=(look_back, n_features))

    x = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding='causal', activation='relu')(inputs)
    x = tf.keras.layers.Conv1D(filters=64, kernel_size=5, padding='causal', activation='relu')(x)
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

    attn_output = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=16)(x, x)
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output + x)

    # Temporal attention for aggregation instead of GlobalAveragePooling
    time_weights = tf.keras.layers.Dense(1, activation='softmax')(x)  # shape (batch, time, 1)
    x = tf.keras.layers.Multiply()([x, time_weights])
    x = tf.keras.layers.Lambda(lambda z: tf.reduce_sum(z, axis=1))(x)  # weighted sum

    x = tf.keras.layers.Dense(32, activation='relu')(x)
    outputs = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

def process_stock(stock, df, hyperparam_set, output_dir, config):
    try:
        logger.info(f"Processing {stock} with CNN-Transformer hybrid...")
        stock_df = df[df['symbol'] == stock].copy()
        stock_df['date'] = pd.to_datetime(stock_df['date'])
        stock_df = stock_df[stock_df['date'] >= config['train_start']]
        stock_df.sort_values('date', inplace=True)
        stock_df.set_index('date', inplace=True)

        features = stock_df[FEATURE_COLUMNS].dropna()
        if features.empty:
            return None

        scaler = MinMaxScaler()
        features_scaled = scaler.fit_transform(features)
        target_index = FEATURE_COLUMNS.index(config['target'])
        X, y = create_sequences(features_scaled, config['look_back'], config['forecast_horizon'], target_index)

        dates = features.index[config['look_back'] + config['forecast_horizon'] - 1:]
        test_mask = (dates >= pd.to_datetime(config['test_start']))
        if not any(test_mask):
            return None

        split_idx = np.where(test_mask)[0][0]
        X_train, y_train = X[:split_idx], y[:split_idx]
        X_test, y_test = X[split_idx:], y[split_idx:]

        model = build_cnn_transformer_model(config['look_back'], X.shape[2])
        model.fit(X_train, y_train, epochs=config['epochs'], batch_size=config['batch_size'], verbose=0)

        predictions = model.predict(X_test, verbose=0).flatten()
        y_true = y_test.flatten()

        mse = mean_squared_error(y_true, predictions)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true, predictions)
        r2 = r2_score(y_true, predictions)

        logger.info(f"{stock} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

        # Saliency analysis
        try:
            input_sample = X_test[-1][np.newaxis, ...]
            input_tensor = tf.convert_to_tensor(input_sample, dtype=tf.float32)
            with tf.GradientTape() as tape:
                tape.watch(input_tensor)
                pred = model(input_tensor)
            grads = tape.gradient(pred, input_tensor).numpy()[0]
            saliency_feature = np.mean(np.abs(grads), axis=0)
            saliency_time = np.mean(np.abs(grads), axis=1)

            pd.DataFrame([saliency_feature], columns=FEATURE_COLUMNS).assign(ticker=stock).to_csv(
                os.path.join(output_dir, f"saliency_{stock}.csv"), index=False)
            pd.DataFrame(saliency_time, columns=['time_saliency']).to_csv(
                os.path.join(output_dir, f"saliency_time_{stock}.csv"), index=False)

            plt.figure(figsize=(12, 6))
            sns.barplot(x=saliency_feature, y=FEATURE_COLUMNS)
            plt.title(f"Feature Saliency - {stock}")
            plt.xlabel("Average Absolute Gradient")
            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, f"saliency_{stock}.png"))
            plt.close()

            plt.figure(figsize=(10, 3))
            plt.plot(saliency_time, marker='o')
            plt.title(f"Temporal Saliency - {stock}")
            plt.xlabel("Time Step")
            plt.ylabel("Importance")
            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, f"saliency_time_{stock}.png"))
            plt.close()
        except Exception as grad_err:
            logger.warning(f"Saliency extraction failed for {stock}: {grad_err}")

        return {
            'Stock': stock,
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R2': r2,
            'hyperparam_set': config['hyperparam_set']
        }

    except Exception as e:
        logger.error(f"Error processing {stock}: {e}")
        return None

def train_and_evaluate_hybrid():
    logger.info("Loading input CSV")
    df = pd.read_csv(CONFIG['input_csv'])
    stocks = df['symbol'].unique()
    logger.info(f"Processing {len(stocks)} stocks")

    output_dir = os.path.join("saliency_outputs", f"cnn_transformer_set_{CONFIG['hyperparam_set']}")
    os.makedirs(output_dir, exist_ok=True)

    results = Parallel(n_jobs=4, backend='loky', verbose=1)(
        delayed(process_stock)(stock, df, CONFIG['hyperparam_set'], output_dir, CONFIG) for stock in stocks
    )

    metrics_records = [r for r in results if r is not None]
    metrics_df = pd.DataFrame(metrics_records)
    metrics_path = os.path.join(output_dir, f"cnn_transformer_metrics.csv")
    metrics_df.to_csv(metrics_path, index=False)

    summary = metrics_df[['MSE', 'RMSE', 'MAE', 'R2']].mean().to_frame().T
    summary['hyperparam_set'] = CONFIG['hyperparam_set']
    summary_csv_path = os.path.join(output_dir, f"cnn_transformer_summary.csv")
    summary.to_csv(summary_csv_path, index=False)
    logger.info("Performance Summary:")
    logger.info("\n" + tabulate(summary, headers='keys', tablefmt='github', showindex=False))

    # Combine all feature saliency files
    saliency_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.startswith("saliency_") and f.endswith(".csv") and 'time' not in f]
    if saliency_files:
        all_saliencies = pd.concat([pd.read_csv(f) for f in saliency_files], ignore_index=True)
        saliency_avg = all_saliencies.drop(columns=['ticker']).mean().to_frame().T
        saliency_avg.to_csv(os.path.join(output_dir, f"cnn_transformer_saliency_combined.csv"), index=False)

        plt.figure(figsize=(12, 6))
        sns.barplot(x=saliency_avg.values.flatten(), y=saliency_avg.columns)
        plt.title("Combined Saliency (Average Absolute Gradient Across Stocks)")
        plt.xlabel("Average Absolute Gradient")
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"cnn_transformer_saliency_combined.png"))
        plt.close()

    # Combine all temporal saliency files
    time_saliency_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.startswith("saliency_time_") and f.endswith(".csv")]
    if time_saliency_files:
        all_time_saliency = pd.concat([pd.read_csv(f) for f in time_saliency_files], ignore_index=True)
        mean_time_saliency = all_time_saliency.mean()
        mean_time_saliency.to_frame(name='avg_time_saliency').to_csv(os.path.join(output_dir, "cnn_transformer_saliency_time_combined.csv"))

        plt.figure(figsize=(10, 3))
        plt.plot(mean_time_saliency.values, marker='o')
        plt.title("Combined Temporal Saliency Across Stocks")
        plt.xlabel("Time Step")
        plt.ylabel("Average Importance")
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "cnn_transformer_saliency_time_combined.png"))
        plt.close()

logger.info("Starting CNN-Transformer hybrid model training and evaluation...")
try:
    train_and_evaluate_hybrid()
    logger.info("Evaluation complete.")
except Exception as e:
    logger.error(f"Training failed: {e}")
    raise


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
2025-05-01 12:41:21.294939: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-05-01 12:41:21.294958: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-05-01 12:41:21.294939: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-05-01 12:41:21.294940: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-05-01 12:41:21.294968: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-05-01 12:41:21.294974: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-05-01 12:41:21.294976: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-05-01 12:41:21.294981: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-05-01 12:41:21.294982: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.0