In [1]:
import os
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# =====================================================
# Config
# =====================================================
PROCESSED_DIR = "./Processed"
MODEL_SAVE_DIR = "./Models"
RESULTS_DIR = "./Results"
os.makedirs(RESULTS_DIR, exist_ok=True)

SEQ_LEN = 8
BATCH_SIZE = 16
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {DEVICE.upper()}")

# =====================================================
# Dataset
# =====================================================
class StockDataset(Dataset):
    def __init__(self, df, seq_len=7):
        self.seq_len = seq_len
        self.features = df[["negative", "neutral", "positive", "Open", "Close", "EMA_7", "EMA_21"]].values
        self.targets = df["Close"].values
        self.samples = []
        for i in range(len(df) - seq_len - 1):
            x = self.features[i:i + seq_len]
            y = self.targets[i + seq_len]
            self.samples.append((x, y))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        x, y = self.samples[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# =====================================================
# Model
# =====================================================
class StockLSTM(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
        self.fc = torch.nn.Linear(hidden_dim, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.fc(out)
        return out.squeeze(-1)

# =====================================================
# Evaluation
# =====================================================
def evaluate_model(model, loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(DEVICE)
            preds = model(x)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = ~np.isnan(y_true) & ~np.isnan(y_pred)
    y_true, y_pred = y_true[mask], y_pred[mask]

    abs_error = np.abs(y_true - y_pred)
    pct_error = abs_error / (np.abs(y_true) + 1e-8) * 100

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    direction_acc = np.mean(np.sign(np.diff(y_true)) == np.sign(np.diff(y_pred)))

    return y_true, y_pred, abs_error, pct_error, rmse, mae, r2, direction_acc

🚀 Using device: CUDA


In [3]:
import os
import gc
import torch
import joblib
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
# =====================================================
# LOOP THROUGH ALL TEST FILES
# =====================================================
summary_records = []

for file in os.listdir(PROCESSED_DIR):
    if not file.endswith("_test.csv"):
        continue

    COMPANY_NAME = file.replace("_test.csv", "")
    TEST_FILE = os.path.join(PROCESSED_DIR, file)

    print(f"\n🏢 Evaluating model for {COMPANY_NAME}...")

    model_path = os.path.join(MODEL_SAVE_DIR, f"{COMPANY_NAME}_model.pt")
    scaler_path = os.path.join(MODEL_SAVE_DIR, f"{COMPANY_NAME}_scaler.pkl")

    # Check existence
    if not os.path.exists(model_path) or not os.path.exists(scaler_path):
        print(f"⚠️ Skipping {COMPANY_NAME} — model or scaler missing.")
        continue

    # Clean memory before each company
    gc.collect()
    torch.cuda.empty_cache()

    # =====================================================
    # Load and prepare test data
    # =====================================================
    df = pd.read_csv(TEST_FILE)
    df = df.sort_values("Date").reset_index(drop=True)

    scaler: StandardScaler = joblib.load(scaler_path)
    df[["Close", "EMA_7", "EMA_21", "Open"]] = scaler.transform(
        df[["Close", "EMA_7", "EMA_21", "Open"]]
    )

    if len(df) < SEQ_LEN + 1:
        print(f"⚠️ Not enough rows in {COMPANY_NAME} for evaluation.")
        continue

    # Assuming StockDataset and evaluate_model are defined elsewhere
    val_dataset = StockDataset(df, seq_len=SEQ_LEN)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    # =====================================================
    # Load model
    # =====================================================
    try:
        model = StockLSTM(input_dim=7).to(DEVICE)
        model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    except RuntimeError:
        print(f"❌ GPU OOM, switching to CPU...")
        DEVICE = torch.device("cpu")
        torch.cuda.empty_cache()
        model = StockLSTM(input_dim=7).to(DEVICE)
        model.load_state_dict(torch.load(model_path, map_location=DEVICE))

    # =====================================================
    # Evaluate model
    # =====================================================
    y_true, y_pred, abs_err, pct_err, rmse, mae, r2, dir_acc = evaluate_model(model, val_loader)

    # =====================================================
    # Inverse-transform predictions and actuals
    # =====================================================
    y_true_unscaled = scaler.inverse_transform(np.hstack([
        np.zeros((len(y_true), 3)),  # placeholders
        y_true.reshape(-1, 1)
    ]))[:, -1]

    y_pred_unscaled = scaler.inverse_transform(np.hstack([
        np.zeros((len(y_pred), 3)),
        y_pred.reshape(-1, 1)
    ]))[:, -1]

    abs_err_real = np.abs(y_true_unscaled - y_pred_unscaled)
    pct_err_real = (abs_err_real / (np.abs(y_true_unscaled) + 1e-8)) * 100

    # =====================================================
    # Save per-row predictions
    # =====================================================
    df_out = pd.DataFrame({
        "Actual": y_true_unscaled,
        "Predicted": y_pred_unscaled,
        "Abs_Error": abs_err_real,
        "Percentage_Error": pct_err_real
    })
    df_out.to_csv(os.path.join(RESULTS_DIR, f"{COMPANY_NAME}_predictions.csv"), index=False)

    # =====================================================
    # Save metrics summary
    # =====================================================
    summary_records.append({
        "Company": COMPANY_NAME,
        "RMSE": rmse,
        "MAE": mae,
        "R²": r2,
        "Direction_Acc(%)": dir_acc * 100
    })

    print(f"✅ {COMPANY_NAME}: RMSE={rmse:.4f}, MAE={mae:.4f}, R²={r2:.4f}, DirAcc={dir_acc*100:.2f}%")

# =====================================================
# SAVE SUMMARY FOR ALL COMPANIES
# =====================================================
summary_df = pd.DataFrame(summary_records)
summary_path = os.path.join(RESULTS_DIR, "summary_metrics.csv")
summary_df.to_csv(summary_path, index=False)

print(f"\n📊 Saved summary metrics to {summary_path}")
print("🎯 Evaluation complete for all companies!")


🏢 Evaluating model for MSFT_stock_gdelt_final...
❌ GPU OOM, switching to CPU...
✅ MSFT_stock_gdelt_final: RMSE=1.2405, MAE=1.2195, R²=-13.7504, DirAcc=52.88%

🏢 Evaluating model for JPM_stock_gdelt_final...
✅ JPM_stock_gdelt_final: RMSE=3.6429, MAE=3.5194, R²=-12.6567, DirAcc=45.55%

🏢 Evaluating model for AAPL_stock_gdelt_final...
✅ AAPL_stock_gdelt_final: RMSE=1.3667, MAE=1.1938, R²=-0.3473, DirAcc=52.88%

🏢 Evaluating model for NVDA_stock_gdelt_final...
✅ NVDA_stock_gdelt_final: RMSE=2.6554, MAE=2.5114, R²=-3.5381, DirAcc=45.03%

🏢 Evaluating model for V_stock_gdelt_final...
✅ V_stock_gdelt_final: RMSE=1.3611, MAE=1.2466, R²=-1.7724, DirAcc=49.21%

🏢 Evaluating model for XOM_stock_gdelt_final...
✅ XOM_stock_gdelt_final: RMSE=0.4140, MAE=0.3947, R²=-4.3345, DirAcc=55.50%

🏢 Evaluating model for PFE_stock_gdelt_final...
✅ PFE_stock_gdelt_final: RMSE=1.1117, MAE=1.1038, R²=-35.3331, DirAcc=50.79%

🏢 Evaluating model for GOOG_stock_gdelt_final...
✅ GOOG_stock_gdelt_final: RMSE=1.2213, 