# Sentiment Analysis

In [None]:
# train_sentiment.py (Public-safe version)

# ============ Section 1: Setup & Augmentation Utilities ============

import time
global_start_time = time.time()

# Ensure necessary nltk resources are installed (run once)
# import nltk
# nltk.download('wordnet')
# nltk.download('omw-1.4')
# nltk.download('stopwords')

import os, random
from collections import Counter
from datetime import datetime

import numpy as np
import pandas as pd
import joblib
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    TrainerCallback
)

from nltk.corpus import wordnet, stopwords
stop_words = set(stopwords.words("english"))

# ------------ Text Augmentation (EDA) ------------
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            name = lemma.name().replace('_', ' ').lower()
            if name != word.lower():
                synonyms.add(name)
    return list(synonyms)

def synonym_replacement(sentence, n):
    ...

def random_insertion(sentence, n):
    ...

def random_swap(sentence, n):
    ...

def random_deletion(sentence, p):
    ...

def eda(sentence, alpha_sr=0.1, alpha_ri=0.1, alpha_rs=0.1, p_rd=0.1):
    ...

def eda_augment(sentence, num_aug=16):
    augmented_sentences = []
    while len(augmented_sentences) < num_aug:
        augmented_sentences.extend(eda(sentence))
    return augmented_sentences[:num_aug]

# ------------ Custom Callback ------------
class CustomEarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=5, loss_diff_threshold=1e8):
        self.patience = patience
        self.loss_diff_threshold = loss_diff_threshold
        self.wait = 0
        self.best_f1 = None
        self.last_train_loss = None
        self.last_good_checkpoint = None

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            self.last_train_loss = logs["loss"]
        return control

    def on_save(self, args, state, control, **kwargs):
        self.last_good_checkpoint = state.best_model_checkpoint
        return control

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        eval_loss = metrics.get("eval_loss")
        eval_f1 = metrics.get("eval_weighted_f1")

        if self.last_train_loss is not None and eval_loss is not None:
            if (eval_loss - self.last_train_loss) > self.loss_diff_threshold:
                print(f"📉 Loss difference exceeded threshold")
                control.should_training_stop = True
                control.best_model_checkpoint = self.last_good_checkpoint
                return control

        if self.best_f1 is None or eval_f1 > self.best_f1:
            self.best_f1 = eval_f1
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                print(f"📌 Early stopping triggered due to stagnant F1")
                control.should_training_stop = True

        return control


## ✅HyperParameter

In [None]:
# Sentiment model training hyperparameters (public-safe placeholders)
eda_mix_ratios       = [1e8]  # EDA augmentation ratio (masked)
learning_rates_sent  = [1e8]  # Learning rate (masked)
batch_sizes_sent     = [1e8]  # Batch size (masked)
num_epochs_list_sent = [1e8]  # Max number of epochs (masked)
weight_decays_sent   = [1e8]  # L2 weight decay (masked)
recent_days_list     = [1e8]  # Lookback window for data filtering (masked)

In [None]:
# ============ Section 2: Path Configuration ============

# Define generic local output paths (adjusted for portability)
base_dir    = './data'
models_dir  = './models'
results_dir = './results'

# Create directories (ignore permission issues for restricted environments)
for d in (base_dir, models_dir, results_dir):
    try:
        os.makedirs(d, exist_ok=True)
    except PermissionError:
        print(f"[Warning] No permission to create: {d} (skipped)")

print("Section 2 complete: Paths set and directories prepared.")

In [None]:
# ============ Section 3: Load Comments and Apply EDA Augmentation ============

# Load comment CSV files
comment_pattern = os.path.join(base_dir, "your_comment_file_*.csv")
comment_files = glob.glob(comment_pattern)

if not comment_files:
    raise FileNotFoundError("No comment files found matching the pattern.")

print(f"Loaded {len(comment_files)} comment file(s).")

# Combine all CSVs into a single DataFrame
df_original = pd.concat([pd.read_csv(f) for f in comment_files], ignore_index=True)

# Parse timestamp column
df_original['timestamp'] = pd.to_datetime(df_original['timestamp'], infer_datetime_format=True)

print(f"Original samples loaded: {df_original.shape[0]}")

# Filter rows with invalid sentiment labels
valid_sentiment_values = [1, 2, 3]
df_original = df_original[df_original['sentimental'].isin(valid_sentiment_values)].copy()

# Remove rows with empty or null text
df_original = df_original[df_original['text'].notna()]
df_original = df_original[df_original['text'].str.strip() != ""]

print(f"Cleaned dataset size: {df_original.shape[0]} samples")

# Fix random seed for reproducibility
random.seed(42)
np.random.seed(42)

# Pre-generate EDA augmented samples (8 per row)
augmented_data = []
num_aug = 1e8

for _, row in df_original.iterrows():
    aug_texts = eda_augment(row['text'], num_aug=num_aug)
    for aug in aug_texts:
        augmented_data.append({
            "coin":        row["coin"],
            "timestamp":   row["timestamp"],
            "text":        aug,
            "sentimental": row["sentimental"]
        })

aug_df_all = pd.DataFrame(augmented_data)

print(f"EDA augmentation completed: {aug_df_all.shape[0]} samples generated")
print("Section 3 complete: Original and augmented datasets prepared.")

In [None]:
# ============ Section 4: Dataset Splitting & Grid Search Loop ============

from datetime import timedelta
import itertools

# Store best results across grid search
tuning_results_sent = []
best_val_metric_sent = None
best_model_dir_sent = None
best_tokenizer_sent = None
best_recent_days_sent = None

# Grid search across all hyperparameter combinations
for eda_mix_ratio, lr, batch_size, num_epochs, weight_decay, recent_days in itertools.product(
    eda_mix_ratios,
    learning_rates_sent,
    batch_sizes_sent,
    num_epochs_list_sent,
    weight_decays_sent,
    recent_days_list
):
    print(f"\n▶️ Grid search combination: eda={1e8}, lr={1e8}, bs={1e8}, ep={1e8}, wd={1e8}, days={1e8}")

    # --- 1) Split into Train/Val/Test using timestamp cutoff ---
    last_ts = df_original['timestamp'].max()
    cutoff_ts = last_ts - timedelta(days=recent_days)

    test_df = df_original[df_original['timestamp'] >= cutoff_ts].copy()
    train_val_df = df_original[df_original['timestamp'] < cutoff_ts].copy()

    split_point = train_val_df['timestamp'].quantile(0.8)
    train_df = train_val_df[train_val_df['timestamp'] < split_point].copy()
    val_df   = train_val_df[train_val_df['timestamp'] >= split_point].copy()

    # --- 2) Apply EDA-augmented samples to training set ---
    n_aug = int(len(train_df) * eda_mix_ratio)
    aug_sampled = aug_df_all.sample(n=n_aug, random_state=42)
    train_combined = pd.concat([train_df, aug_sampled], ignore_index=True)

    # Prepare labels (convert to 0-indexed)
    train_combined['label'] = train_combined['sentimental'].astype(int) - 1
    val_df['label'] = val_df['sentimental'].astype(int) - 1
    test_df['label'] = test_df['sentimental'].astype(int) - 1

    print(f"Train: {train_combined.shape}, Val: {val_df.shape}, Test: {test_df.shape}")
    print("Train label distribution:\n", train_combined['label'].value_counts())
    print("Val label distribution:\n", val_df['label'].value_counts())

# (Section 5 continues with tokenizer setup and model training using Trainer)
print("Section 4 complete: Data split and grid search loop initialized.")


In [None]:
# ============ Section 5: Tokenizer, Datasets, and EarlyStopping ============

MAX_LEN = 128  # Max token length for BERTweet

# --- Load Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)

# --- PyTorch Dataset for training ---
class TextDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=MAX_LEN):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        encoding = self.tokenizer(
            row["text"],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )
        encoding = {k: v.squeeze(0) for k, v in encoding.items()}
        encoding["labels"] = torch.tensor(row["label"])
        return encoding

# --- Create Datasets ---
train_dataset = TextDataset(train_combined, tokenizer)
val_dataset = TextDataset(val_df, tokenizer)

# --- Optional: Dataset for inference/testing ---
class SimpleSentimentDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=MAX_LEN):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        encoding = self.tokenizer(
            row['text'],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {k: v.squeeze(0) for k, v in encoding.items()}

print("Section 5 complete: Tokenizer and Dataset classes initialized.")


In [None]:
# ============ Section 6: Trainer Setup and Training ============

from transformers import TrainingArguments, Trainer, AutoModelForSequenceClassification
from sklearn.metrics import f1_score, confusion_matrix
from transformers import TextClassificationPipeline

output_dir = f"./tmp_sentiment_model"

training_args = TrainingArguments(
    output_dir=output_dir,
    save_strategy="epoch",
    eval_strategy="epoch",
    per_device_train_batch_size=int(1e8),
    per_device_eval_batch_size=int(1e8),
    num_train_epochs=int(1e8),
    weight_decay=1e8,
    learning_rate=1e8,
    load_best_model_at_end=True,
    metric_for_best_model="eval_weighted_f1",
    seed=42,
    logging_steps=50,
    report_to=[]
)

model_sent = AutoModelForSequenceClassification.from_pretrained(
    "vinai/bertweet-base",
    num_labels=3
)

trainer_sent = Trainer(
    model=model_sent,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=lambda p: {
        "eval_weighted_f1": f1_score(
            p.label_ids,
            np.argmax(p.predictions, axis=1),
            average="weighted"
        )
    },
    callbacks=[CustomEarlyStoppingCallback(patience=1e8, loss_diff_threshold=1e8)]
)

# Train model
trainer_sent.train()

# Evaluate on validation set
val_metrics = trainer_sent.evaluate(eval_dataset=val_dataset)
val_f1 = val_metrics.get("eval_weighted_f1", None)
print(f"Validation Weighted F1: {val_f1:.4f}")

# Save best model if improved
tuning_results_sent.append({...})
if best_val_metric_sent is None or val_f1 > best_val_metric_sent:
    best_val_metric_sent = val_f1
    best_model_dir_sent = output_dir
    best_tokenizer_sent = tokenizer
    best_recent_days_sent = recent_days
    trainer_sent.save_model(output_dir)

# ============ Save Final Best Model and Tokenizer ============

from datetime import datetime
today_str = datetime.today().strftime("%Y%m%d")

sentiment_model_dir = os.path.join(models_dir, f"sentiment_model_hf_{today_str}")
os.makedirs(sentiment_model_dir, exist_ok=True)
best_model = AutoModelForSequenceClassification.from_pretrained(best_model_dir_sent)
best_model.save_pretrained(sentiment_model_dir)

sentiment_tokenizer_dir = os.path.join(models_dir, f"sentiment_tokenizer_hf_{today_str}")
os.makedirs(sentiment_tokenizer_dir, exist_ok=True)
best_tokenizer_sent.save_pretrained(sentiment_tokenizer_dir)

print(f"Best model saved to {sentiment_model_dir}")
print(f"Tokenizer saved to {sentiment_tokenizer_dir}")

# ============ Evaluate on Test Set and Generate Sentiment Score ============

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)
best_model.eval()

test_dataset = TextDataset(test_df, best_tokenizer_sent, max_length=MAX_LEN)
test_loader = DataLoader(test_dataset, batch_size=32)

all_preds, all_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = best_model(input_ids=input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_f1 = f1_score(all_labels, all_preds, average="weighted")
test_cm = confusion_matrix(all_labels, all_preds)

print("\n[Test Set Evaluation]")
print(f"Test Weighted F1: {test_f1:.4f}")
print(f"Confusion Matrix:\n{test_cm}")

# Generate daily sentiment score
sentiment_pipe = TextClassificationPipeline(
    model=best_model,
    tokenizer=best_tokenizer_sent,
    top_k=1,
    device=0 if torch.cuda.is_available() else -1
)

test_texts = test_df["text"].tolist()
test_preds = sentiment_pipe(
    test_texts,
    batch_size=32,
    truncation=True,
    padding=True,
    max_length=MAX_LEN
)

test_label_ids = [int(pred[0]['label'].split('_')[-1]) for pred in test_preds]
test_df["label"] = test_label_ids

test_coin_daily_sentiment = test_df.groupby(
    ["coin", test_df["timestamp"].dt.date]
)["label"].mean().reset_index().rename(columns={"label": "sentiment_score"})

print("Daily sentiment scores generated.")

In [None]:
best_result = max(tuning_results_sent, key=lambda x: x["val_f1"])
print("🏆 Best Hyperparameter Combination:")
for k, v in best_result.items():
    print(f"{k}: {v}")

## ⬆️Training

# Crpto Price Forecasting

In [None]:
# ============ Section 7: Load Technical Indicators, Merge Sentiment, Scale Features ============

# Load technical indicator CSVs
ti_pattern = os.path.join(base_dir, "your_technical_file_*.csv")
ti_files = glob.glob(ti_pattern)

if not ti_files:
    raise FileNotFoundError("No technical indicator files found.")

print(f"Found {len(ti_files)} technical indicator file(s).")

ti_df = pd.concat([pd.read_csv(f) for f in ti_files], ignore_index=True)
ti_df["timestamp"] = pd.to_datetime(ti_df["timestamp"])
ti_df["date"] = ti_df["timestamp"].dt.date

# Split by timestamp
train_index_df = ti_df[ti_df["timestamp"] < cutoff_ts].copy()
test_index_df = ti_df[ti_df["timestamp"] >= cutoff_ts].copy()

# Merge sentiment scores
train_index_df = pd.merge(
    train_index_df, train_coin_daily_sentiment,
    on=["coin", "date"], how="inner"
)
test_index_df = pd.merge(
    test_index_df, test_coin_daily_sentiment,
    on=["coin", "date"], how="inner"
)

# Optional sentiment score boosting
train_index_df["sentiment_score"] *= 1e8
test_index_df["sentiment_score"] *= 1e8

# Define feature columns (technical + sentiment)
feature_cols = [
    "sma", "ema", "macd", "macd_signal", "macd_diff",
    "rsi", "stochastic", "mfi", "cci", "sentiment_score"
]

# Fill missing MFI values and remove ID column if present
for df in (train_index_df, test_index_df):
    if "mfi" in df.columns:
        df["mfi"] = df["mfi"].fillna(0)
    if "id" in df.columns:
        df.drop(columns=["id"], inplace=True)

# Scale features (except sentiment_score)
scaler = StandardScaler()
numeric_cols = [col for col in feature_cols if col != "sentiment_score"]

scaled_train = scaler.fit_transform(train_index_df[numeric_cols])
scaled_test = scaler.transform(test_index_df[numeric_cols])

scaled_train_df = pd.DataFrame(scaled_train, columns=numeric_cols, index=train_index_df.index)
scaled_test_df = pd.DataFrame(scaled_test, columns=numeric_cols, index=test_index_df.index)

# Recombine with non-numeric columns
final_train_df = pd.concat([train_index_df.drop(columns=numeric_cols), scaled_train_df], axis=1)
final_test_df = pd.concat([test_index_df.drop(columns=numeric_cols), scaled_test_df], axis=1)

# Label encoding (price trend: up/down)
final_train_df = final_train_df.dropna()
final_train_df = final_train_df[final_train_df["price_trend"].isin(["down", "up"])].copy()
final_train_df["label"] = final_train_df["price_trend"].map({"down": 0, "up": 1})

final_test_df = final_test_df.dropna()
final_test_df = final_test_df[final_test_df["price_trend"].isin(["down", "up"])].copy()
final_test_df["label"] = final_test_df["price_trend"].map({"down": 0, "up": 1})

print("✅ PatchTST-compatible datasets prepared.")

# Save fitted scaler
today_str = datetime.today().strftime("%Y%m%d")
scaler_path = os.path.join(models_dir, f"scaler_standard_{today_str}.pkl")
joblib.dump(scaler, scaler_path)
print(f"✅ Scaler saved to: {scaler_path}")


In [None]:
# ============ Section 8: PatchTST Dataset & Model Definition ============

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Sliding-window-based time series dataset for coin indicators ---
class CoinTimeSeriesDataset(Dataset):
    def __init__(self, df, window_size, feature_cols):
        self.samples = []
        for coin, grp in df.groupby('coin'):
            grp_sorted = grp.sort_values('timestamp')
            X = grp_sorted[feature_cols].values.astype(np.float32)
            y = grp_sorted['label'].values.astype(np.int64)
            for i in range(len(X) - window_size + 1):
                window_X = X[i:i+window_size]
                window_y = y[i+window_size-1]
                self.samples.append((window_X, window_y, coin))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        X, y, coin = self.samples[idx]
        return {
            "x": torch.tensor(X),
            "labels": torch.tensor(y),
            "coin": coin
        }

# --- PatchTST model (Transformer-based time series classifier) ---
class PatchTST(nn.Module):
    def __init__(self, input_size, d_model, num_layers, num_heads,
                 patch_size, window_size, num_classes, dropout=0.3):
        super().__init__()
        assert window_size % patch_size == 0, "patch_size must divide window_size exactly."
        self.patch_size = patch_size
        num_patches = window_size // patch_size

        self.input_proj = nn.Linear(input_size * patch_size, d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=num_heads, dropout=dropout, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.fc = nn.Linear(d_model, num_classes)
        self.loss_fct = nn.CrossEntropyLoss()

    def forward(self, x, labels=None):
        batch_size, window_size, input_size = x.shape
        assert window_size % self.patch_size == 0, "window_size must be divisible by patch_size"
        num_patches = window_size // self.patch_size

        x = x.view(batch_size, num_patches, input_size * self.patch_size)
        x = self.input_proj(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        logits = self.fc(x)

        if labels is not None:
            loss = self.loss_fct(logits, labels)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

## ✅HyperParameter

In [None]:
# ============ PatchTST Hyperparameters ============

# Tunable hyperparameters for grid search (masked for portfolio release)
tunable_params_ts = {
    "patch_lens":     [1e8],   # Patch size
    "window_sizes":   [1e8],   # Sliding window size
    "d_models":       [1e8],   # Transformer model dimension
    "dropouts":       [1e8],   # Dropout rate
    "learning_rates": [1e8],   # Learning rate
    "weight_decays":  [1e8]    # L2 regularization
}

# Fixed parameters for all runs (can be adjusted as needed)
fixed_params_ts = {
    "num_layers":     1e8,
    "num_heads":      1e8,
    "num_classes":    2,
    "input_size":     len(feature_cols),  # dynamically set
    "batch_size":     1e8,
    "num_train_epochs": 1e8  # EarlyStopping will override
}

In [None]:
# ============ Section 9: PatchTST Grid Search Training & Evaluation ============

grid_search_results_ts = []
best_val_f1_ts = None
best_test_f1_ts = None
best_ts_model = None
best_ts_model_dir = None
best_ts_hyperparams = None

today_str = datetime.now().strftime('%Y%m%d')

for pl, dm, do, ws, lr_ts, wd_ts in itertools.product(
    tunable_params_ts["patch_lens"],
    tunable_params_ts["d_models"],
    tunable_params_ts["dropouts"],
    tunable_params_ts["window_sizes"],
    tunable_params_ts["learning_rates"],
    tunable_params_ts["weight_decays"]
):
    print(f"\n▶️ Experiment: patch=1e8, d_model=1e8, dropout=1e8, window=1e8, lr=1e8, wd=1e8")

    # Dataset creation
    train_ds = CoinTimeSeriesDataset(final_train_df, window_size=ws, feature_cols=feature_cols)
    test_ds = CoinTimeSeriesDataset(final_test_df, window_size=ws, feature_cols=feature_cols)

    # Stratified train/val split
    labels = [s[1] for s in train_ds.samples]
    valid_idx = [i for i in range(len(train_ds.samples)) if labels.count(train_ds.samples[i][1]) >= 2]
    strat_labels = [train_ds.samples[i][1] for i in valid_idx]
    tr_idx, vl_idx = train_test_split(valid_idx, test_size=1e8, stratify=strat_labels, random_state=42)
    tr_ds = Subset(train_ds, tr_idx)
    vl_ds = Subset(train_ds, vl_idx)

    # Training arguments
    ts_output_dir = f"./tmp_ts_model"

    training_args_ts = TrainingArguments(
        output_dir=ts_output_dir,
        eval_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=int(1e8),
        per_device_eval_batch_size=int(1e8),
        num_train_epochs=int(1e8),
        learning_rate=1e8,
        weight_decay=1e8,
        load_best_model_at_end=True,
        metric_for_best_model="weighted_f1",
        seed=42,
        logging_steps=50,
        report_to=[]
    )

    # Model initialization
    patch_model = PatchTST(
        input_size=fixed_params_ts["input_size"],
        d_model=dm,
        num_layers=int(fixed_params_ts["num_layers"]),
        num_heads=int(fixed_params_ts["num_heads"]),
        patch_size=pl,
        window_size=ws,
        num_classes=fixed_params_ts["num_classes"],
        dropout=do
    )

    ts_trainer = Trainer(
        model=patch_model,
        args=training_args_ts,
        train_dataset=tr_ds,
        eval_dataset=vl_ds,
        compute_metrics=lambda p: {
            "weighted_f1": f1_score(p.label_ids, np.argmax(p.predictions, axis=1), average="weighted")
        },
        callbacks=[CustomEarlyStoppingCallback(patience=1e8, loss_diff_threshold=1e8)]
    )

    ts_trainer.train()

    val_metrics = ts_trainer.evaluate(eval_dataset=vl_ds)
    val_f1 = val_metrics.get("eval_weighted_f1", None)
    print(f"Validation Weighted F1: {val_f1:.4f}" if val_f1 else "Validation failed.")

    if best_val_f1_ts is None or (val_f1 is not None and val_f1 > best_val_f1_ts):
        best_val_f1_ts = val_f1
        best_ts_model = ts_trainer.model
        best_ts_model_dir = ts_output_dir
        best_ts_hyperparams = {
            "patch_len": 1e8, "d_model": 1e8, "dropout": 1e8,
            "window_size": 1e8, "learning_rate": 1e8, "weight_decay": 1e8
        }

# Evaluate best model on test set
if best_ts_model is not None:
    test_loader = DataLoader(test_ds, batch_size=int(1e8), shuffle=False)
    best_ts_model.to(device)
    best_ts_model.eval()

    all_logits, all_labels = [], []
    with torch.no_grad():
        for batch in test_loader:
            x = batch["x"].to(device)
            y = batch["labels"].to(device)
            outputs = best_ts_model(x)
            all_logits.append(outputs["logits"])
            all_labels.append(y)

    final_logits = torch.cat(all_logits, dim=0)
    final_labels = torch.cat(all_labels, dim=0)
    all_preds = torch.argmax(final_logits, dim=1)

    test_f1 = f1_score(final_labels.cpu(), all_preds.cpu(), average="weighted")
    test_cm = confusion_matrix(final_labels.cpu(), all_preds.cpu())
    test_loss = nn.CrossEntropyLoss()(final_logits, final_labels).item()

    print("\n🏆 Best PatchTST Model (hyperparameters masked)")
    print(f"Test Weighted F1: {test_f1:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Confusion Matrix:\n{test_cm}")

    save_ts_path = os.path.join(models_dir, f"timeseries_model_{today_str}.pt")
    torch.save({
        'model_state_dict': best_ts_model.state_dict(),
        'hyperparams': best_ts_hyperparams
    }, save_ts_path)
    print(f"✅ PatchTST model saved to: {save_ts_path}")
else:
    print("⚠️ No valid PatchTST model to save.")

# Total runtime
total_elapsed = time.time() - global_start_time
print(f"\n⏱️ Total pipeline runtime: {total_elapsed:.2f} seconds")


## ⬆️Training