In [None]:
!pip3 install seaborn tensorflow pandas

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from tqdm import tqdm
import atexit
import math

# Create log directory and file with timestamp
log_time = datetime.now().strftime("%Y%m%d_%H%M%S")
log_dir = "log"
os.makedirs(log_dir, exist_ok=True)
log_filename = os.path.join(log_dir, f"transformer_log_{log_time}.txt")
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(processName)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename, mode='w')
    ]
)
logger = logging.getLogger(__name__)

# Ensure log handlers are flushed on exit
atexit.register(lambda: [h.flush() for h in logger.handlers if hasattr(h, 'flush')])

# Configuration
CONFIG = {
    'input_csv': '../../data/daily_stock_price/sp500_top25_technical_indicators.csv',
    'output_csv': 'transformer_evaluation_results_walk_forward.csv',
    'target': 'Close',
    'look_back': 60,
    'forecast_horizon': 1,
    'batch_size': 128,
    'epochs': 1,
    'walkforward_retrain_step': 100,
    'train_start': '2001-01-01',
    'train_end': '2020-12-31',
    'test_start': '2021-01-01'
}

FEATURE_COLUMNS = [
    'Close', 'SMA_20', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist',
    'BB_Upper', 'BB_Lower', 'ATR_14', 'OBV',
    'Close_Lag_1', 'Close_Lag_2', 'Close_Lag_3', 'Close_Lag_5',
    'Volume_Lag_1', 'Volume_Lag_3', 'Daily_Return', 'Volatility_20',
    'High_Low_Range', 'Open_Close_Range', 'MACD_Hist_Slope'
]

def build_transformer_with_attention(look_back, n_features):
    inputs = tf.keras.Input(shape=(look_back, n_features))
    attention_layer = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=16, name='attention')
    attn_output, attn_scores = attention_layer(inputs, inputs, return_attention_scores=True)
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output + inputs)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = tf.keras.layers.Dense(32, activation='relu')(x)
    prediction = tf.keras.layers.Dense(1, name='prediction')(x)
    model = tf.keras.Model(inputs=inputs, outputs=[prediction, attn_scores])
    model.compile(
        optimizer='adam',
        loss={'prediction': 'mse', 'attention': None},
        loss_weights={'prediction': 1.0, 'attention': 0.0}
    )
    return model

def create_sequences(data, look_back, forecast_horizon, target_index=0):
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:i+look_back, :])
        y.append(data[i + look_back:i + look_back + forecast_horizon, target_index])
    return np.array(X), np.array(y)

def process_stock(stock, df):
    try:
        logger.info(f"Processing {stock}...")
        print(f"Starting {stock}...")  # Indicate stock start in cell output
        stock_df = df[df['symbol'] == stock].copy()
        stock_df['date'] = pd.to_datetime(stock_df['date'])
        stock_df = stock_df[(stock_df['date'] >= CONFIG['train_start'])]
        stock_df.sort_values('date', inplace=True)
        stock_df.set_index('date', inplace=True)

        features = stock_df[FEATURE_COLUMNS].dropna()
        if features.empty:
            logger.warning(f"No valid data for {stock} after preprocessing.")
            return None

        scaler = MinMaxScaler()
        features_scaled = scaler.fit_transform(features)
        target_index = FEATURE_COLUMNS.index(CONFIG['target'])
        X, y = create_sequences(features_scaled, CONFIG['look_back'], CONFIG['forecast_horizon'], target_index)

        dates = stock_df.index[CONFIG['look_back'] + CONFIG['forecast_horizon'] - 1:]
        date_mask = (dates >= pd.to_datetime(CONFIG['test_start']))
        split_idx = np.where(date_mask)[0][0] if any(date_mask) else len(X)

        X_train_full, y_train_full = X[:split_idx], y[:split_idx]
        X_test, y_test = X[split_idx:], y[split_idx:]

        # Calculate total work units: training (fixed) + test steps
        training_work = 100  # Arbitrary units for training
        test_work = len(X_test)  # One unit per test step
        total_work = training_work + test_work
        current_work = 0

        model = build_transformer_with_attention(CONFIG['look_back'], X.shape[2])
        model.fit(
            X_train_full,
            [y_train_full, np.zeros((len(y_train_full), 2, CONFIG['look_back'], CONFIG['look_back']))],
            epochs=CONFIG['epochs'],
            batch_size=CONFIG['batch_size'],
            verbose=0
        )
        current_work += training_work
        progress_percent = (current_work / total_work) * 100
        print(f"{stock}: {progress_percent:.0f}%", end='\r')

        os.makedirs("model", exist_ok=True)
        model_path = os.path.join("model", f"{stock}.keras")
        model.save(model_path)
        logger.info(f"Saved model for {stock} to {model_path}")

        predictions = []
        history_X, history_y = X_train_full.tolist(), y_train_full.tolist()
        for i in range(len(X_test)):
            if i % 10 == 0:
                logger.info(f"Completed {i}/{len(X_test)} test steps for {stock}")
            if i % CONFIG['walkforward_retrain_step'] == 0 and i > 0:
                logger.info(f"Retraining at step {i}/{len(X_test)} for {stock}")
                model = build_transformer_with_attention(CONFIG['look_back'], X.shape[2])
                model.fit(
                    np.array(history_X),
                    [np.array(history_y), np.zeros((len(history_y), 2, CONFIG['look_back'], CONFIG['look_back']))],
                    epochs=CONFIG['epochs'],
                    batch_size=CONFIG['batch_size'],
                    verbose=0
                )
            pred, _ = model.predict(X_test[i:i+1], verbose=0)
            predictions.append(pred[0][0])
            history_X.append(X_test[i].tolist())
            history_y.append(y_test[i].tolist())
            current_work += 1
            progress_percent = (current_work / total_work) * 100
            print(f"{stock}: {progress_percent:.0f}%", end='\r')

        preds_inv = scaler.inverse_transform(np.repeat(np.array(predictions)[:, np.newaxis], X.shape[2], axis=1))[:, target_index]
        y_true_inv = scaler.inverse_transform(np.repeat(y_test[:, np.newaxis], X.shape[2], axis=1))[:, target_index]

        mse = mean_squared_error(y_true_inv, preds_inv)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true_inv, preds_inv)
        r2 = r2_score(y_true_inv, preds_inv)

        logger.info(f"{stock} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")
        print()  # Move to next line
        print(f"{stock}: Completed")
        return {'Stock': stock, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'R2': r2}

    except Exception as e:
        logger.error(f"Error processing {stock}: {e}")
        return None

def train_and_evaluate_transformer():
    df = pd.read_csv(CONFIG['input_csv'])
    stocks = df['symbol'].unique()
    results = []

    for stock in tqdm(stocks, desc="Evaluating stocks", disable=True):  # Disable tqdm output
        result = process_stock(stock, df)
        if result:
            results.append(result)

    result_df = pd.DataFrame(results)
    result_df.to_csv(CONFIG['output_csv'], index=False)

    logger.info("\nEvaluation Results:\n" + result_df.to_string(index=False))

    sns.set_style("whitegrid")
    plt.figure(figsize=(12, 6))
    table = plt.table(cellText=result_df.round(4).values, colLabels=result_df.columns, cellLoc='center', loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.2)
    plt.axis('off')
    plt.title('Transformer Model Performance Metrics (Walk-Forward)', fontsize=14, pad=20)
    plt.tight_layout()
    plt.savefig('transformer_results.png')
    plt.close()

    return result_df

if __name__ == '__main__':
    logger.info("Starting Transformer model training and evaluation...")
    result_df = train_and_evaluate_transformer()
    if result_df is not None:
        logger.info("Evaluation complete.")
        print(result_df)