In [1]:
import pandas as pd

In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoConfig
from tsfm_public.models.tspulse.modeling_tspulse import TSPulseForClassification
from collections import OrderedDict
from safetensors.torch import load_file
import os
import logging
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Configure logging to show all info
logging.basicConfig(level=logging.INFO, format='INFO:%(name)s:%(message)s')
logger = logging.getLogger(__name__)


In [3]:

# --- Functions ---
def load_tspulse_model(ckpt: str, model_class, config=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if config is None:
        config = AutoConfig.from_pretrained(ckpt, trust_remote_code=True)
    
    st_path = os.path.join(ckpt, "model.safetensors")
    bin_path = os.path.join(ckpt, "pytorch_model.bin")
    
    state_dict = None
    if os.path.exists(st_path):
        state_dict = load_file(st_path, device="cpu")
    elif os.path.exists(bin_path):
        state_dict = torch.load(bin_path, map_location="cpu")
    
    if state_dict is not None:
        new_sd = OrderedDict()
        for k, v in state_dict.items():
            nk = k.replace("_orig_mod.", "", 1) if k.startswith("_orig_mod.") else k
            new_sd[nk] = v
        model = model_class(config).to(device)
        model.load_state_dict(new_sd, strict=False)
    else:
        raise FileNotFoundError(f"No model file found in checkpoint directory: {ckpt}")
    
    return model

In [4]:
class ClassificationDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return { "past_values": self.X[idx], "target_values": self.y[idx] }

def evaluate_model(model, dataloader, device, X_test_raw, label_name, num_labels):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            inputs = {k: v.to(device) for k, v in batch.items() if k != 'target_values'}
            labels = batch['target_values'].to(device)
            
            inputs['past_observed_mask'] = (inputs['past_values'] != 0.0)
            outputs = model(**inputs) 

            logits = outputs.prediction_outputs
            if logits.dim() > 2:
                logits = logits.mean(dim=1)
            
            if labels.dim() > 1:
                labels = labels[:, 0]
            labels = labels.view(-1)
            predictions = torch.argmax(logits, dim=-1)
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    
    if np.any(all_labels == 1):
        true_mean_label_1 = X_test_raw[all_labels == 1].mean()
    else:
        true_mean_label_1 = np.nan

    if np.any(all_preds == 1):
        predicted_mean_label_1 = X_test_raw[all_preds == 1].mean()
    else:
        predicted_mean_label_1 = np.nan

    predicted_positives_count = np.sum(all_preds == 1)

    print("\n" + "="*40)
    print(f"Classification Report for: {label_name}")
    print("="*40)
    all_possible_labels = list(range(num_labels))
    target_names = [f"class_{i}" for i in all_possible_labels]
    report_dict = classification_report(
    all_labels,
    all_preds,
    labels=all_possible_labels,
    target_names=target_names,
    zero_division=0,
    output_dict=True
)

    report_text = classification_report(
        all_labels,
        all_preds,
        labels=all_possible_labels,
        target_names=target_names,
        zero_division=0
    )
    print(report_text)
    print("="*40 + "\n")

    # Extract macro precision and recall
    precision_macro = report_dict["macro avg"]["precision"]
    recall_macro = report_dict["macro avg"]["recall"]

    return {
        "accuracy": accuracy,
        "f1_macro": f1_macro,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "true_mean_label_1": true_mean_label_1,
        "predicted_mean_label_1": predicted_mean_label_1,
        "predicted_positives_count": predicted_positives_count
    }


In [5]:


# --- Data Loading and Preprocessing ---
df = pd.read_csv('../Dataset/Classification/comstock_60min_small.csv')
labels_df = pd.read_csv('../Dataset/Classification/comstock_60min_labels.csv')

X_raw = df.drop(columns=[ "Timestamp"]).T.values  # "Unnamed: 0",
context_length = 512
num_series, series_length = X_raw.shape
X_processed = np.zeros((num_series, context_length))

for i in tqdm(range(num_series), desc="Processing Series"):
    series = X_raw[i, :]
    series = np.nan_to_num(series, nan=0.0)
    if len(series) > context_length:
        X_processed[i, :] = series[:context_length]
    else:
        X_processed[i, :len(series)] = series
X_processed = X_processed[:, :, np.newaxis]

# --- Main Training Loop ---
target_labels = ["cooling_ON", "fans_ON", "heat_rejection_ON", "heating_ON", "refrigeration_ON", "water_systems_ON"]
pretrained_model_path = "../Energy-TSPulse/Pretraining/Checkpoint"
base_output_dir = "./finetuned_classification_manual_loop"
os.makedirs(base_output_dir, exist_ok=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
all_results = {}


Processing Series:   0%|          | 0/1000 [00:00<?, ?it/s]

In [6]:
config = AutoConfig.from_pretrained(pretrained_model_path, trust_remote_code=True)


In [7]:
config

TSPulseConfig {
  "architectures": [
    "TSPulseForReconstruction"
  ],
  "batch_aware_masking": false,
  "categorical_vocab_size_list": null,
  "channel_consistent_masking": true,
  "channel_mix_init": "identity",
  "channel_register_tokens": null,
  "channel_virtual_expand_scale": 1,
  "classification_mode": "full_embedding",
  "context_length": 512,
  "d_model": 16,
  "d_model_layerwise": [
    16,
    16,
    16
  ],
  "d_model_layerwise_scale": [
    1,
    1,
    1
  ],
  "data_actual_context_length": null,
  "decoder_d_model": 16,
  "decoder_d_model_layerwise": [
    16,
    16,
    16
  ],
  "decoder_d_model_layerwise_scale": [
    1,
    1,
    1
  ],
  "decoder_mode": "common_channel",
  "decoder_num_channels_layerwise": [
    1,
    1,
    1
  ],
  "decoder_num_channels_layerwise_scale": [
    1,
    1,
    1
  ],
  "decoder_num_layers": 3,
  "decoder_num_patches_layerwise": [
    64,
    64,
    64
  ],
  "decoder_num_patches_layerwise_scale": [
    1,
    1,
    1
  ],
  

In [8]:
all_results = {}

for label_name in target_labels:
    logger.info("=" * 80)
    logger.info(f"Zero-shot evaluation for label: {label_name}")

    if label_name not in labels_df.columns:
        logger.warning(f"Label '{label_name}' not found in labels file. Skipping.")
        continue

    # Encode labels (ONLY for evaluation)
    y_current = labels_df[label_name].values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_current)
    num_labels = len(np.unique(y_encoded))

    logger.info(
        f"Label '{label_name}' has {num_labels} unique classes: {np.unique(y_encoded)}"
    )

    if num_labels <= 1:
        logger.warning(f"Skipping label '{label_name}' as it has only one class.")
        continue

    # Use the full dataset for zero-shot evaluation
    dataset = ClassificationDataset(X_processed, y_encoded)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

    X_raw_for_eval = X_processed.squeeze()

    # Load pretrained Energy-TSPulse (NO head resizing)
    config = AutoConfig.from_pretrained(
        pretrained_model_path,
        trust_remote_code=True,
    )

    model = TSPulseForClassification.from_pretrained(
        pretrained_model_path,
        config=config,
    )
    model.to(device)
    model.eval()

    logger.info("Running zero-shot inference...")

    with torch.no_grad():
        eval_results = evaluate_model(
            model,
            dataloader,
            device,
            X_raw_for_eval,
            label_name,
            num_labels,
        )

    all_results[label_name] = eval_results

    logger.info(
        f"Zero-shot results for '{label_name}' | "
        f"Accuracy: {eval_results['accuracy']:.4f} | "
        f"Macro F1: {eval_results['f1_macro']:.4f}"
    )

# --- Final Summary ---
logger.info("=" * 80)
logger.info("Final Zero-Shot Classification Summary")
logger.info("=" * 80)

results_df = pd.DataFrame(all_results).T
print(results_df)

summary_filename = "final_classification_summary_zeroshot.txt"
summary_string = results_df.to_string()

try:
    with open(summary_filename, "w") as f:
        f.write("Final Zero-Shot Classification Summary\n")
        f.write("=====================================\n\n")
        f.write(summary_string)
    logger.info(f"Final summary successfully saved to: {summary_filename}")
except Exception as e:
    logger.error(f"Failed to save summary file. Error: {e}")


INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: cooling_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'cooling_ON' has 2 unique classes: [0 1]


INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Running zero-shot inference...


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'cooling_ON' | Accuracy: 0.2870 | Macro F1: 0.2101
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: fans_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'fans_ON' has 2 unique classes: [0 1]
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.weight']
You should 


Classification Report for: cooling_ON
              precision    recall  f1-score   support

     class_0       0.18      0.33      0.24       236
     class_1       0.71      0.27      0.39       764

   micro avg       0.40      0.29      0.33      1000
   macro avg       0.44      0.30      0.32      1000
weighted avg       0.58      0.29      0.36      1000




Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'fans_ON' | Accuracy: 0.3410 | Macro F1: 0.1737
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: heat_rejection_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'heat_rejection_ON' has 2 unique classes: [0 1]
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.wei


Classification Report for: fans_ON
              precision    recall  f1-score   support

     class_0       0.00      0.00      0.00        98
     class_1       0.84      0.38      0.52       902

   micro avg       0.83      0.34      0.48      1000
   macro avg       0.42      0.19      0.26      1000
weighted avg       0.76      0.34      0.47      1000




Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'heat_rejection_ON' | Accuracy: 0.2320 | Macro F1: 0.1777
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: heating_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'heating_ON' has 2 unique classes: [0 1]
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.weight'


Classification Report for: heat_rejection_ON
              precision    recall  f1-score   support

     class_0       0.93      0.23      0.36       883
     class_1       0.12      0.27      0.17       117

   micro avg       0.49      0.23      0.31      1000
   macro avg       0.53      0.25      0.27      1000
weighted avg       0.84      0.23      0.34      1000




Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'heating_ON' | Accuracy: 0.5530 | Macro F1: 0.2407
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: refrigeration_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'refrigeration_ON' has 2 unique classes: [0 1]
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.we


Classification Report for: heating_ON
              precision    recall  f1-score   support

     class_0       0.55      1.00      0.71       551
     class_1       1.00      0.00      0.01       449

   micro avg       0.56      0.55      0.55      1000
   macro avg       0.78      0.50      0.36      1000
weighted avg       0.75      0.55      0.40      1000




INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Running zero-shot inference...


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'refrigeration_ON' | Accuracy: 0.5090 | Macro F1: 0.2910
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot evaluation for label: water_systems_ON
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Label 'water_systems_ON' has 2 unique classes: [0 1]
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch
INFO:p-1783847:t-133191251789632:modeling_tspulse.py:_init_weights:Initializing Linear layers with method: pytorch



Classification Report for: refrigeration_ON
              precision    recall  f1-score   support

     class_0       0.66      0.68      0.67       684
     class_1       0.34      0.15      0.20       316

   micro avg       0.61      0.51      0.55      1000
   macro avg       0.50      0.41      0.44      1000
weighted avg       0.56      0.51      0.52      1000




Some weights of TSPulseForClassification were not initialized from the model checkpoint at ../Energy-TSPulse/Pretraining/Checkpoint and are newly initialized: ['decoder_with_head.head.head_norm.norm.bias', 'decoder_with_head.head.head_norm.norm.weight', 'decoder_with_head.head.loc_scale_norm.bias', 'decoder_with_head.head.loc_scale_norm.weight', 'decoder_with_head.head.projection.bias', 'decoder_with_head.head.projection.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Running zero-shot inference...


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Zero-shot results for 'water_systems_ON' | Accuracy: 0.0330 | Macro F1: 0.0433
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Final Zero-Shot Classification Summary
INFO:p-1783847:t-133191251789632:3931918699.py:<module>:Final summary successfully saved to: final_classification_summary_zeroshot.txt



Classification Report for: water_systems_ON
              precision    recall  f1-score   support

     class_0       0.15      0.06      0.08       274
     class_1       0.71      0.02      0.05       726

   micro avg       0.26      0.03      0.06      1000
   macro avg       0.43      0.04      0.06      1000
weighted avg       0.56      0.03      0.06      1000


                   accuracy  f1_macro  precision_macro  recall_macro  \
cooling_ON            0.287  0.210115         0.444805      0.302034   
fans_ON               0.341  0.173669         0.418919      0.189024   
heat_rejection_ON     0.232  0.177684         0.528126      0.250002   
heating_ON            0.553  0.240713         0.777163      0.502227   
refrigeration_ON      0.509  0.291022         0.501085      0.411235   
water_systems_ON      0.033  0.043255         0.430357      0.040905   

                   true_mean_label_1  predicted_mean_label_1  \
cooling_ON                  7.496923                5.8074