In [1]:
import pandas as pd
import numpy as np
import os
import joblib
from tensorflow.keras.models import load_model
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from contextlib import redirect_stdout
import io

# Configuration (same as training)
CONFIG = {
    'input_csv': '../../data/daily_stock_price/filtered_top25sp_stocks.csv',
    'models_folder': 'stock_models',
    'output_csv': 'evaluation_results.csv',
    'saliency_folder': 'lstm_saliency_outputs',
    'features': ['open', 'high', 'low', 'close', 'volume'],
    'target': 'close',
    'sequence_length': 60,
    'train_split': 0.8,
    'random_seed': 42,
    'lstm_epochs': 50,
    'batch_size': 32,
    'validation_split': 0.1
}

output_csv_path = os.path.join(CONFIG['models_folder'], CONFIG['output_csv'])

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length, CONFIG['features'].index(CONFIG['target'])])
    return np.array(X), np.array(y)

def compute_saliency_and_hidden(stock):
    model_path = os.path.join(CONFIG['models_folder'], f"{stock}_lstm_model.h5")
    scaler_path = os.path.join(CONFIG['models_folder'], f"{stock}_scaler.pkl")
    if not os.path.exists(model_path) or not os.path.exists(scaler_path):
        print(f"Model or scaler not found for {stock}")
        return None, None

    # Load model and scaler
    model = load_model(model_path, custom_objects={'mse': tf.keras.losses.MeanSquaredError()})
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])  # Match training compilation
    scaler = joblib.load(scaler_path)

    # Prepare input data
    df = pd.read_csv(CONFIG['input_csv'])
    stock_data = df[df['symbol'] == stock][CONFIG['features']].values
    scaled_data = scaler.transform(stock_data)
    X, y = create_sequences(scaled_data, CONFIG['sequence_length'])
    train_size = int(len(X) * CONFIG['train_split'])
    X_test = X[train_size:]

    # Prepare input tensor
    X_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
    X_tensor = tf.Variable(X_tensor)

    # Initialize model: Build and evaluate
    try:
        input_shape = (None, CONFIG['sequence_length'], len(CONFIG['features']))
        model.build(input_shape)
        print(f"Built model for {stock} with input shape: {input_shape}")
        model.predict(X_tensor[:1], verbose=0)  # Single batch prediction
        print(f"Model evaluated for {stock}, input shape: {model.input_shape}")
    except Exception as e:
        print(f"Failed to initialize model for {stock}: {e}")
        return None, None

    # Compute saliency
    with tf.GradientTape() as tape:
        tape.watch(X_tensor)
        preds = model(X_tensor)
    grads = tape.gradient(preds, X_tensor).numpy()
    saliency = np.abs(grads).mean(axis=(0, 1))
    saliency_df = pd.DataFrame([saliency], columns=CONFIG['features'])
    saliency_df['Stock'] = stock

    # Visualize saliency
    plt.figure(figsize=(10, 6))
    saliency_df.drop(columns='Stock').iloc[0].sort_values(ascending=False).plot(kind='barh')
    plt.title(f"Saliency for {stock}")
    plt.xlabel("Saliency Value")
    plt.ylabel("Features")
    plt.tight_layout()
    plt.savefig(os.path.join(CONFIG['saliency_folder'], f"{stock}_saliency_plot.png"))
    plt.close()

    # Skip hidden activations to avoid errors
    hidden_df = None
    """
    try:
        summary_io = io.StringIO()
        with redirect_stdout(summary_io):
            model.summary()
        print(f"Model summary for {stock}:\n{summary_io.getvalue()}")

        intermediate_layer_model = tf.keras.Model(
            inputs=model.input,
            outputs=[
                model.get_layer("lstm_1").output,
                model.get_layer("lstm_2").output
            ]
        )
        lstm_outputs = intermediate_layer_model.predict(X_tensor, verbose=0)
        lstm1_activations = np.mean(lstm_outputs[0], axis=(0, 1))
        lstm2_activations = np.mean(lstm_outputs[1], axis=0)

        hidden_df = pd.DataFrame({
            "lstm1_neuron": lstm1_activations,
            "lstm2_neuron": lstm2_activations
        })
        hidden_df['Stock'] = stock
        hidden_df.to_csv(os.path.join(CONFIG['saliency_folder'], f"{stock}_hidden_activations.csv"), index=False)
    except Exception as e:
        print(f"Failed to extract hidden activations for {stock}: {e}")
        print(f"Model architecture on error for {stock}:\n{summary_io.getvalue()}")
    """

    return saliency_df, hidden_df

def evaluate_only():
    df = pd.read_csv(CONFIG['input_csv'])
    stocks = df['symbol'].unique()
    results = []
    saliency_all = []
    hidden_all = []

    for stock in stocks:
        print(f"Evaluating {stock}...")
        model_path = os.path.join(CONFIG['models_folder'], f"{stock}_lstm_model.h5")
        scaler_path = os.path.join(CONFIG['models_folder'], f"{stock}_scaler.pkl")
        if not os.path.exists(model_path) or not os.path.exists(scaler_path):
            print(f"Skipping {stock}: Model or scaler not found")
            continue

        try:
            model = load_model(model_path, custom_objects={'mse': tf.keras.losses.MeanSquaredError()})
            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            scaler = joblib.load(scaler_path)
        except Exception as e:
            print(f"Error loading model/scaler for {stock}: {e}")
            continue

        stock_data = df[df['symbol'] == stock][CONFIG['features']].values
        scaled_data = scaler.transform(stock_data)
        X, y = create_sequences(scaled_data, CONFIG['sequence_length'])
        train_size = int(len(X) * CONFIG['train_split'])
        X_test, y_test = X[train_size:], y[train_size:]
        y_pred_scaled = model.predict(X_test, verbose=0)

        y_test_full = np.zeros((len(y_test), len(CONFIG['features'])))
        y_pred_full = np.zeros((len(y_pred_scaled), len(CONFIG['features'])))
        idx = CONFIG['features'].index(CONFIG['target'])
        y_test_full[:, idx] = y_test
        y_pred_full[:, idx] = y_pred_scaled.flatten()
        y_test_inv = scaler.inverse_transform(y_test_full)[:, idx]
        y_pred_inv = scaler.inverse_transform(y_pred_full)[:, idx]

        results.append({
            'Stock': stock,
            'MSE': mean_squared_error(y_test_inv, y_pred_inv),
            'RMSE': np.sqrt(mean_squared_error(y_test_inv, y_pred_inv)),
            'MAE': mean_absolute_error(y_test_inv, y_pred_inv),
            'R2': r2_score(y_test_inv, y_pred_inv)
        })

        saliency_df, hidden_df = compute_saliency_and_hidden(stock)
        if saliency_df is not None:
            saliency_all.append(saliency_df)
        if hidden_df is not None:
            hidden_all.append(hidden_df)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv_path, index=False)
    print("Evaluation Results:")
    print(results_df)

    if saliency_all:
        all_saliency = pd.concat(saliency_all, ignore_index=True)
        all_saliency.to_csv(os.path.join(CONFIG['saliency_folder'], 'all_saliency.csv'), index=False)

        mean_saliency = all_saliency.drop(columns='Stock').mean().sort_values(ascending=False)
        mean_saliency_df = mean_saliency.to_frame(name='Mean Saliency')
        mean_saliency_df.to_csv(os.path.join(CONFIG['saliency_folder'], 'model_saliency_average.csv'))

        plt.figure(figsize=(10, 6))
        mean_saliency.plot(kind='barh')
        plt.title("Mean Saliency Across All Stocks")
        plt.xlabel("Mean Saliency")
        plt.ylabel("Features")
        plt.tight_layout()
        plt.savefig(os.path.join(CONFIG['saliency_folder'], 'combined_saliency_plot.png'))
        plt.close()

    if hidden_all:
        all_hidden = pd.concat(hidden_all, ignore_index=True)
        all_hidden.to_csv(os.path.join(CONFIG['saliency_folder'], 'hidden_activation_summary.csv'), index=False)

    return results_df, all_saliency if saliency_all else None, all_hidden if hidden_all else None

if __name__ == "__main__":
    results_df, all_saliency, all_hidden = evaluate_only()

2025-05-02 04:44:38.311291: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-05-02 04:44:38.311311: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-05-02 04:44:38.311315: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-05-02 04:44:38.311329: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-02 04:44:38.311339: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Evaluating ABBV...


2025-05-02 04:44:38.609692: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Built model for ABBV with input shape: (None, 60, 5)
Model evaluated for ABBV, input shape: (None, 60, 5)




Evaluating GOOGL...




Built model for GOOGL with input shape: (None, 60, 5)
Model evaluated for GOOGL, input shape: (None, 60, 5)




Evaluating GOOG...




Built model for GOOG with input shape: (None, 60, 5)
Model evaluated for GOOG, input shape: (None, 60, 5)




Evaluating AMZN...




Built model for AMZN with input shape: (None, 60, 5)
Model evaluated for AMZN, input shape: (None, 60, 5)




Evaluating AAPL...




Built model for AAPL with input shape: (None, 60, 5)
Model evaluated for AAPL, input shape: (None, 60, 5)




Evaluating BAC...




Built model for BAC with input shape: (None, 60, 5)
Model evaluated for BAC, input shape: (None, 60, 5)




Evaluating BRK-B...




Built model for BRK-B with input shape: (None, 60, 5)
Model evaluated for BRK-B, input shape: (None, 60, 5)




Evaluating AVGO...




Built model for AVGO with input shape: (None, 60, 5)
Model evaluated for AVGO, input shape: (None, 60, 5)




Evaluating COST...




Built model for COST with input shape: (None, 60, 5)
Model evaluated for COST, input shape: (None, 60, 5)




Evaluating XOM...




Built model for XOM with input shape: (None, 60, 5)
Model evaluated for XOM, input shape: (None, 60, 5)




Evaluating HD...




Built model for HD with input shape: (None, 60, 5)
Model evaluated for HD, input shape: (None, 60, 5)




Evaluating JNJ...




Built model for JNJ with input shape: (None, 60, 5)
Model evaluated for JNJ, input shape: (None, 60, 5)




Evaluating JPM...




Built model for JPM with input shape: (None, 60, 5)
Model evaluated for JPM, input shape: (None, 60, 5)




Evaluating LLY...




Built model for LLY with input shape: (None, 60, 5)
Model evaluated for LLY, input shape: (None, 60, 5)




Evaluating MA...




Built model for MA with input shape: (None, 60, 5)
Model evaluated for MA, input shape: (None, 60, 5)




Evaluating META...




Built model for META with input shape: (None, 60, 5)
Model evaluated for META, input shape: (None, 60, 5)




Evaluating MSFT...




Built model for MSFT with input shape: (None, 60, 5)
Model evaluated for MSFT, input shape: (None, 60, 5)




Evaluating NFLX...




Built model for NFLX with input shape: (None, 60, 5)
Model evaluated for NFLX, input shape: (None, 60, 5)




Evaluating NVDA...




Built model for NVDA with input shape: (None, 60, 5)
Model evaluated for NVDA, input shape: (None, 60, 5)




Evaluating ORCL...




Built model for ORCL with input shape: (None, 60, 5)
Model evaluated for ORCL, input shape: (None, 60, 5)




Evaluating PG...




Built model for PG with input shape: (None, 60, 5)
Model evaluated for PG, input shape: (None, 60, 5)




Evaluating TSLA...




Built model for TSLA with input shape: (None, 60, 5)
Model evaluated for TSLA, input shape: (None, 60, 5)




Evaluating UNH...




Built model for UNH with input shape: (None, 60, 5)
Model evaluated for UNH, input shape: (None, 60, 5)




Evaluating V...




Built model for V with input shape: (None, 60, 5)
Model evaluated for V, input shape: (None, 60, 5)




Evaluating WMT...




Built model for WMT with input shape: (None, 60, 5)
Model evaluated for WMT, input shape: (None, 60, 5)
Evaluation Results:
    Stock           MSE        RMSE        MAE        R2
0    ABBV     14.969746    3.869076   2.834885  0.967233
1   GOOGL     24.130472    4.912278   3.678896  0.967547
2    GOOG    265.232886   16.285972  14.652526  0.650809
3    AMZN    633.073856   25.160959  22.100684  0.523061
4    AAPL    716.394607   26.765549  17.178600  0.837244
5     BAC      0.948593    0.973957   0.796138  0.989163
6   BRK-B    171.352905   13.090184   9.359313  0.975302
7    AVGO    118.186201   10.871348   6.293096  0.960161
8    COST   4922.394531   70.159779  43.457655  0.903523
9     XOM     95.748739    9.785128   5.155087  0.801611
10     HD   1701.470078   41.248880  32.403153  0.773679
11    JNJ   2907.209483   53.918545  45.303745 -1.437958
12    JPM    193.034295   13.893678   9.645972  0.912996
13    LLY  13893.837713  117.872124  55.618070  0.760966
14     MA   1283.5824