## Imports

In [1]:
import math
import os
import tempfile
import warnings

import numpy as np
import torch
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader, random_split
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed
from transformers.data.data_collator import default_data_collator
from transformers.trainer_utils import RemoveColumnsCollator


warnings.filterwarnings("ignore")

In [2]:
from tsfm_public.models.tspulse import TSPulseForClassification
from tsfm_public.toolkit.dataset import ClassificationDFDataset
from tsfm_public.toolkit.lr_finder import optimal_lr_finder
from tsfm_public.toolkit.time_series_classification_preprocessor import TimeSeriesClassificationPreprocessor
from tsfm_public.toolkit.util import convert_tsfile_to_dataframe

## Data Preprocessing

In [3]:
seed = 42
set_seed(seed)

In [4]:
dataset_name = "BasicMotions"

In [5]:
path = f"/datasets/{dataset_name}/{dataset_name}_TRAIN.ts"

df_base = convert_tsfile_to_dataframe(
    path,
    return_separate_X_and_y=False,
)
label_column = "class_vals"
input_columns = [f"dim_{i}" for i in range(df_base.shape[1] - 1)]

tsp = TimeSeriesClassificationPreprocessor(
    input_columns=input_columns,
    label_column=label_column,
    scaling=True,
)

tsp.train(df_base)
df_prep = tsp.preprocess(df_base)
base_dataset = ClassificationDFDataset(
    df_prep,
    id_columns=[],
    timestamp_column=None,
    input_columns=input_columns,
    label_column=label_column,
    context_length=512,
    static_categorical_columns=[],
    stride=1,
    enable_padding=False,
    full_series=True,
)

path = f"/datasets/{dataset_name}/{dataset_name}_TRAIN.ts"

df_test = convert_tsfile_to_dataframe(
    path,
    return_separate_X_and_y=False,
)
label_column = "class_vals"
input_columns = [f"dim_{i}" for i in range(df_test.shape[1] - 1)]

tsp = TimeSeriesClassificationPreprocessor(
    input_columns=input_columns,
    label_column=label_column,
    scaling=True,
)

tsp.train(df_test)
df_prep = tsp.preprocess(df_test)

test_dataset = ClassificationDFDataset(
    df_prep,
    id_columns=[],
    timestamp_column=None,
    input_columns=input_columns,
    label_column=label_column,
    context_length=512,
    static_categorical_columns=[],
    stride=1,
    enable_padding=False,
    full_series=True,
)

dataset_size = len(base_dataset)
print(dataset_size)
split_valid_ratio = 0.1
val_size = int(split_valid_ratio * dataset_size)  # 10% valid split
train_size = dataset_size - val_size
train_dataset, valid_dataset = random_split(base_dataset, [train_size, val_size])

40


## Configs for the TSPulse model

In [6]:
config_dict = {
    "head_gated_attention_activation": "softmax",
    "channel_virtual_expand_scale": 2,
    "mask_ratio": 0.3,
    "head_reduce_d_model": 1,
    "disable_mask_in_classification_eval": True,
    "fft_time_consistent_masking": True,
    "decoder_mode": "mix_channel",
    "head_aggregation_dim": "patch",
    "head_aggregation": None,
    "loss": "cross_entropy",
    "ignore_mismatched_sizes": True,
}

config_dict["num_input_channels"] = tsp.num_input_channels
config_dict["num_targets"] = df_base["class_vals"].nunique()

## Getting the Pretrained Model with above configs

In [7]:
model_path = "/tspulse/tspulse_classification/tspulse_model"
model = TSPulseForClassification.from_pretrained(model_path, **config_dict)

Some weights of TSPulseForClassification were not initialized from the model checkpoint at /dccstor/tsfm23/vj_share/models/tspulse_neurips/models/vela/apr_20_block_mask/fft_mix_learn_mask_with_registers_v20_scaled_consistent/tspulse_consistent_masking_var_hybrid_e20_scaled_p16_sign_w20-20250418-0024-1/models/tspulse_model and are newly initialized: ['decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.gating_block.attn_layer.bias', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.gating_block.attn_layer.weight', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.mlp.fc1.bias', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.mlp.fc1.weight', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.mlp.fc2.bias', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.mlp.fc2.weight', 'decoder_with_head.decoder.decoder_block.mixers.0.channel_feature_mixer.norm.norm.bias',

Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: pytorch
Initializing Linear layers with method: 

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).float()

In [9]:
# Freezing Backbone except patch embedding layer....

for param in model.backbone.parameters():
    param.requires_grad = False

for param in model.backbone.time_encoding.parameters():
    param.requires_grad = True
for param in model.backbone.fft_encoding.parameters():
    param.requires_grad = True

## Finetuning the classifier head and patch embedding layer

In [10]:
OUT_DIR = "tspulse_finetuned_models/"

In [11]:
temp_dir = tempfile.mkdtemp()

suggested_lr = None

train_dict = {"per_device_train_batch_size": 32, "num_train_epochs": 200, "eval_accumulation_steps": None}

EPOCHS = train_dict["num_train_epochs"]
BATCH_SIZE = train_dict["per_device_train_batch_size"]
eval_accumulation_steps = train_dict["eval_accumulation_steps"]
NUM_WORKERS = 1
NUM_GPUS = 1

set_seed(42)
if suggested_lr is None:
    lr, model = optimal_lr_finder(
        model,
        train_dataset,
        batch_size=BATCH_SIZE,
    )
    suggested_lr = lr
print("Suggested LR : ", suggested_lr)
finetune_args = TrainingArguments(
    output_dir=temp_dir,
    overwrite_output_dir=True,
    learning_rate=suggested_lr,
    num_train_epochs=EPOCHS,
    do_eval=True,
    eval_strategy="epoch",
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    eval_accumulation_steps=eval_accumulation_steps,
    dataloader_num_workers=NUM_WORKERS,
    report_to="tensorboard",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=1,
    logging_dir=os.path.join(OUT_DIR, "output"),  # Make sure to specify a logging directory
    load_best_model_at_end=True,  # Load the best model when training ends
    metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
    greater_is_better=False,  # For loss
)

# Create the early stopping callback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=100,  # Number of epochs with no improvement after which to stop
    early_stopping_threshold=0.0001,  # Minimum improvement required to consider as improvement
)

# Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=suggested_lr)
scheduler = OneCycleLR(
    optimizer,
    suggested_lr,
    epochs=EPOCHS,
    steps_per_epoch=math.ceil(len(train_dataset) / (BATCH_SIZE * NUM_GPUS)),
)

finetune_trainer = Trainer(
    model=model,
    args=finetune_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    callbacks=[early_stopping_callback],
    optimizers=(optimizer, scheduler),
)

# Fine tune
finetune_trainer.train()

INFO:p-2776908:t-23180912518144:lr_finder.py:optimal_lr_finder:LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
INFO:p-2776908:t-23180912518144:lr_finder.py:optimal_lr_finder:LR Finder: Using cuda:0.
INFO:p-2776908:t-23180912518144:lr_finder.py:optimal_lr_finder:LR Finder: Suggested learning rate = 0.007054802310718645


Suggested LR :  0.007054802310718645


Epoch,Training Loss,Validation Loss
1,1.5569,1.442582
2,1.427,1.427761
3,1.2568,1.41231
4,1.3812,1.396338
5,1.3275,1.383629
6,1.3363,1.375756
7,1.3537,1.373112
8,1.3863,1.377008
9,1.3128,1.382742
10,1.2802,1.387058


TrainOutput(global_step=400, training_loss=0.14310260522610407, metrics={'train_runtime': 64.902, 'train_samples_per_second': 110.937, 'train_steps_per_second': 6.163, 'total_flos': 49402375372800.0, 'train_loss': 0.14310260522610407, 'epoch': 200.0})

## Classification Scores

In [12]:
predictions_dict = finetune_trainer.predict(test_dataset)
preds_np = predictions_dict.predictions[0]

remove_columns_collator = RemoveColumnsCollator(
    data_collator=default_data_collator,
    signature_columns=["target_values"],
    logger=None,
    description=None,
    model_name="temp",
)

test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=remove_columns_collator)
target_list = []
for batch in test_dataloader:
    batch_labels = batch["target_values"].numpy()
    target_list.append(batch_labels)
targets_np = np.concatenate(target_list, axis=0)
test_accuracy = np.mean(targets_np == np.argmax(preds_np, axis=1))
print("test_accuracy : ", test_accuracy)

test_accuracy :  1.0
