In [1]:
!pip install -U datasets huggingface_hub fsspec

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [2]:
import pandas as pd
import torch
from chronos import BaseChronosPipeline, ChronosTokenizer
import numpy as np
from datasets import load_dataset, Dataset
# from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling, pipeline
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, DataCollatorForSeq2Seq
import accelerate
from torch.utils.data import Dataset, DataLoader

In [3]:
# return cls(
#             tokenizer=chronos_config.create_tokenizer(),
#             model=ChronosModel(config=chronos_config, model=inner_model),
#         )


pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",  # use "amazon/chronos-bolt-small" for the corresponding Chronos-Bolt model
    device_map="auto",  # use "cpu" for CPU inference
    torch_dtype=torch.bfloat16,
)

In [45]:
model = pipeline.model.to(torch.device("mps"))
tokenizer = pipeline.tokenizer

In [5]:
# from huggingface_hub import notebook_login
# notebook_login()

In [6]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
dataset = load_dataset("shaddie/thrust_curves_dataset")

Repo card metadata block was not found. Setting CardData to empty.


In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['time', 'thrust', 'motorId', 'motor_name', 'impulse_class', 'id'],
        num_rows: 6008
    })
})

In [8]:
def generate_windows(values, context_length, prediction_length):
    inputs, targets = [], []
    total_length = context_length + prediction_length
    for i in range(len(values) - total_length + 1):
        context = values[i : i + context_length]
        target = values[i + context_length : i + total_length]
        inputs.append(context)
        targets.append(target)
    return inputs, targets

In [9]:
from collections import defaultdict
from datasets import Dataset

context_length = 24
prediction_length = 12

# Step 1: Group values by 'motorId'
groups = defaultdict(list)
for example in dataset['train']:
    id = example['id']
    time = example['time']
    motor_id = example['motorId']
    value = example['thrust']
    impulse_class = example['impulse_class']
    groups[motor_id].append({'id': id, 'time': time, 'thrust': value, 'motor_id': motor_id, 'impulse_class': impulse_class})

In [10]:
groups['5f4294d200023100000000f7']

[{'id': 0,
  'time': 0.0,
  'thrust': 0.0,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 1,
  'time': 0.0,
  'thrust': 1.0,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 2,
  'time': 0.0533667938931297,
  'thrust': 0.800000899236099,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 3,
  'time': 0.4013167938931297,
  'thrust': 0.6923083840277684,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 4,
  'time': 0.7876946564885495,
  'thrust': 0.4153841311805621,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 5,
  'time': 0.8239847328244275,
  'thrust': 0.2307687119791736,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 6,
  'time': 0.8709465648854962,
  'thrust': 0.1538462576041652,
  'motor_id': '5f4294d200023100000000f7',
  'impulse_class': 'J'},
 {'id': 7,
  'time': 1.0,
  'thrust': 0.0,
  'motor_id': '5f4294d200023100000000f

In [11]:
group_keys = []
for key, value in groups.items():
    group_keys.append({'id': [v['id'] for v in value][0] 
                       ,'time':[(v['time']) for v in value]
                       ,'thrust':[(v['thrust']) for v in value]
                       ,'motor_id':key
                       ,'impulse_class': [(v['impulse_class']) for v in value][0]
                      })
    # print(f'key, value {key} -> {len(value)}')

In [14]:
# Step 2: Slide windows within each group
def generate_windows(values, context_length, prediction_length):
    total_length = context_length + prediction_length
    windows = []
    for i in range(len(values) - total_length + 1):
        context = values[i : i + context_length]
        target = values[i + context_length : i + total_length]
        windows.append((context, target))
    return windows

In [15]:
# Step 3: Aggregate all windows into a flat dataset
windowed_data = []
for motor_id, values in groups.items():
    windows = generate_windows(values, context_length, prediction_length)
    # print(f'motor id {windows}')
    for context, target in windows:
        windowed_data.append({
                    'input':{'id': [c['id'] for c in context][0] 
                            ,'time':[(c['time']) for c in context]
                            ,'thrust':[(c['thrust']) for c in context]
                            ,'motor_id':[(c['motor_id']) for c in context][0]
                            ,'impulse_class': [(c['impulse_class']) for c in context][0]
                            }
                    , 'target':{'id': [t['id'] for t in target][0] 
                            ,'time':[(t['time']) for t in target]
                            ,'thrust':[(t['thrust']) for t in target]
                            ,'motor_id':[(t['motor_id']) for t in target][0]
                            ,'impulse_class': [(t['impulse_class']) for t in target][0]
                            }
        })

In [16]:
# Step 4: Build a Dataset
windowed_dataset = Dataset.from_list(windowed_data)

In [17]:
windowed_dataset

Dataset({
    features: ['input', 'target'],
    num_rows: 191
})

In [18]:
windowed_dataset['input'][0]

{'id': 23,
 'impulse_class': 'J',
 'motor_id': '5f4294d20002310000000256',
 'thrust': [0.0,
  0.9454233664183034,
  0.7429588165385054,
  1.0,
  0.7183106043750306,
  0.711268064230788,
  0.5633801479326868,
  0.7429588165385054,
  0.8327467941586668,
  0.7376758939182626,
  0.7235921703125255,
  0.7411978423317578,
  0.7411978423317578,
  0.7447184340625052,
  0.7429588165385054,
  0.7605631318749899,
  0.7834517265144649,
  0.8028170160576971,
  0.8133801479326868,
  0.8116205304086872,
  0.8098595562019396,
  0.8045779902644447,
  0.7975354501202021,
  0.7799297781009698],
 'time': [0.0,
  0.0143426294820717,
  0.0191235059760956,
  0.0247011952191235,
  0.0310756972111553,
  0.0406374501992031,
  0.0430278884462151,
  0.050199203187251,
  0.0629482071713147,
  0.0749003984063745,
  0.0844621513944223,
  0.1011952191235059,
  0.1163346613545816,
  0.1521912350597609,
  0.2231075697211156,
  0.30199203187251,
  0.3768924302788844,
  0.4446215139442231,
  0.5011952191235061,
  0.54900

In [19]:
# windowed_dataset['target'][0]

In [20]:
def preprocess_windowed(example):
    # Include motorId in input prompt if helpful (optional)
    # input_str = f"forecast: " + " ".join(map(str, example["input"]))
    # target_str = " ".join(map(str, example["target"]))
    # print(example['input']['thrust'])
    t = torch.from_numpy(np.asarray(example['input']['thrust']))
    t_shape = t.shape
    t = t.reshape(1, t_shape[0])
    # print(t.shape)

    token_ids, attention_mask, scale = tokenizer.context_input_transform(t)
    token_ids, attention_mask = tokenizer._append_eos_token(token_ids, attention_mask)
    
    # tokenizer.pad_token = tokenizer.eos_token
    return {
            "token_ids": token_ids
            , "attention_mask": attention_mask
            , "scale": scale
    }

tokenized_dataset = windowed_dataset.map(preprocess_windowed, remove_columns=windowed_dataset.column_names)

Map:   0%|          | 0/191 [00:00<?, ? examples/s]

In [21]:
tokenized_dataset

Dataset({
    features: ['token_ids', 'attention_mask', 'scale'],
    num_rows: 191
})

In [22]:
def preprocess_windowed(example):
    input_str = "forecast: " + " ".join(map(str, example["token_ids"]))
    # target_str = " ".join(map(str, example["target"]))
    return tokenizer.context_input_transform(input_str)

In [23]:
prep_tokenized_dataset = tokenized_dataset.map(preprocess_windowed, remove_columns=tokenized_dataset.column_names)

Map:   0%|          | 0/191 [00:00<?, ? examples/s]

AttributeError: 'str' object has no attribute 'shape'

In [93]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding=False)

In [81]:
import os
work_dir = os.getcwd()
model_dir = os.getcwd().replace('GitHub/chronos-forecasting','DL_Models')

In [82]:
training_args = TrainingArguments(
    output_dir=model_dir,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=5,
    logging_steps=50,
    save_steps=500,
    save_total_limit=2,
    learning_rate=2e-4,
    bf16=torch.cuda.is_bf16_supported(),
    fp16=torch.cuda.is_bf16_supported(),
    report_to="none"
)

In [83]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

NameError: name 'data_collator' is not defined

In [84]:
class TimeSeriesDataset(Dataset):
    def __init__(self, examples, tokenizer):
        self.examples = examples  # ✅ renamed from 'data'
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        example = self.examples[idx]
        input_str = f"forecast: {' '.join(map(str, example['input']))}"
        target_str = " ".join(map(str, example["target"]))

        tokenized = self.tokenizer(
            input_str,
            text_target=target_str,
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )

        return {
            "input_ids": tokenized["input_ids"].squeeze(),
            "attention_mask": tokenized["attention_mask"].squeeze(),
            "labels": tokenized["labels"].squeeze()
        }


In [85]:
# # Step 5: Train
# trainer.train()

In [86]:
# # Create a PyTorch Dataset
# class TimeSeriesDataset(Dataset):
#     def __init__(self, data, tokenizer):
#         self.data = data
#         self.tokenizer = tokenizer

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         example = self.data[idx]
#         input_str = f"forecast: {' '.join(map(str, example['input']))}"
#         target_str = " ".join(map(str, example["target"]))

#         tokenized = self.tokenizer(
#             input_str,
#             text_target=target_str,
#             truncation=True,
#             padding="max_length",
#             max_length=128,
#             return_tensors="pt"
#         )

#         # Squeeze to remove batch dimension
#         return {
#             "input_ids": tokenized["input_ids"].squeeze(),
#             "attention_mask": tokenized["attention_mask"].squeeze(),
#             "labels": tokenized["labels"].squeeze()
#         }

# Create DataLoader
train_dataset = TimeSeriesDataset(windowed_dataset, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)


In [87]:
## copied from https://github.com/amazon-science/chronos-forecasting/blob/main/scripts/training/train.py
# def __init__(
#         self,
#         datasets: list,
#         probabilities: List[float],
#         tokenizer: ChronosTokenizer,
#         context_length: int = 512,
#         prediction_length: int = 64,
#         drop_prob: float = 0.2,
#         min_past: Optional[int] = None,
#         model_type: str = "seq2seq",
#         imputation_method: Optional[MissingValueImputation] = None,
#         mode: str = "training",
#         np_dtype=np.float32,



def to_hf_format(entry: dict, tokenizer: ChronosTokenizer, model_type: str) -> dict:
        print((entry["target"]))
        past_target = torch.from_numpy(np.asarray(entry["input"]["thrust"])).unsqueeze(0)
        
        input_ids, attention_mask, scale = tokenizer.context_input_transform(
            past_target
        )
        print(f'scale {scale}')
        future_target = torch.tensor(entry["target"]["thrust"]).unsqueeze(0)
        print(f'shape past target {future_target.shape} {future_target.shape[-1]}')
        labels, labels_mask = tokenizer.label_input_transform(future_target, scale)
        labels[labels_mask == 0] = -100

        if model_type == "causal":
            # The InstanceSplitter pads time series on the left to be equal to the
            # context_length. However, certain models (e.g., GPT2) with absolute
            # position embeddings should not be trained with left padding.
            # The following piece of code moves padding from left to right.

            assert input_ids.shape[-1] == entry["past_is_pad"].shape[0]

            # Find the index where padding starts
            pad_start_idx = np.searchsorted(1 - entry["past_is_pad"], 1)
            padded_input_ids, obs_input_ids = torch.tensor_split(
                input_ids, [pad_start_idx], dim=-1
            )
            padded_attention_mask, obs_attention_mask = torch.tensor_split(
                attention_mask, [pad_start_idx], dim=-1
            )

            # Move padding to the right
            input_ids = torch.cat(
                [
                    obs_input_ids,
                    labels,
                    padded_input_ids,
                ],
                axis=-1,
            )
            attention_mask = torch.cat(
                [
                    obs_attention_mask,
                    labels_mask,
                    padded_attention_mask,
                ],
                axis=-1,
            )

            # labels for causal models are same as the input_ids.
            # Internally transformers shifts the labels by one during training.
            labels = input_ids.clone()
            input_ids[~attention_mask] = self.tokenizer.config.pad_token_id
            labels[~attention_mask] = -100

        return {
            "input_ids": input_ids.squeeze(0),
            "attention_mask": attention_mask.squeeze(0),
            "labels": labels.squeeze(0),
        }

    

In [88]:
tokenizer.config.prediction_length=12

In [89]:
hf_format_dataset = to_hf_format(windowed_dataset[1], tokenizer, "seq2seq")

{'id': 48, 'impulse_class': 'J', 'motor_id': '5f4294d20002310000000256', 'thrust': [0.7816907523077173, 0.7904929099759597, 0.7904929099759597, 0.75, 0.6021134403846466, 0.4489439581490506, 0.3732397041346263, 0.3450709002404041, 0.3045779902644446, 0.1830992603365658, 0.105634032115394, 0.0440135017067069], 'time': [0.7944223107569722, 0.8087649402390438, 0.8175298804780877, 0.8278884462151395, 0.851792828685259, 0.8764940239043826, 0.8924302788844624, 0.9123505976095618, 0.9250996015936256, 0.9505976095617532, 0.9633466135458169, 0.9776892430278886]}
scale tensor([0.7762])
shape past target torch.Size([1, 12]) 12


In [90]:
hf_format_dataset

{'input_ids': tensor([2215, 2180, 2225, 2175, 2174, 2148, 2180, 2195, 2179, 2176, 2179, 2179,
         2180, 2180, 2183, 2187, 2190, 2192, 2192, 2191, 2190, 2189, 2186, 2185,
            1]),
 'attention_mask': tensor([True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True]),
 'labels': tensor([2186, 2188, 2188, 2181, 2155, 2128, 2115, 2110, 2103, 2081, 2068, 2057,
            1])}

In [91]:
training_args = TrainingArguments(
    output_dir=model_dir,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=5,
    logging_steps=50,
    save_steps=500,
    save_total_limit=2,
    learning_rate=2e-4,
    bf16=torch.cuda.is_bf16_supported(),
    fp16=torch.cuda.is_bf16_supported(),
    report_to="none"
)

In [94]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=hf_format_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(


In [95]:
trainer.train()

KeyError: 0

In [34]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from collections import defaultdict
from tqdm import tqdm

In [35]:
from transformers.optimization import Adafactor, AdafactorSchedule

optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
lr_scheduler = AdafactorSchedule(optimizer)

In [36]:


# # Training Loop
# epochs = 3
# model.train()

# for epoch in range(epochs):
#     total_loss = 0.0
#     for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}"):
#         batch = {k: v.to(device) for k, v in batch.items()}

#         outputs = model(**batch)
#         loss = outputs.loss
#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()

#         total_loss += loss.item()

#     print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")


Epoch 1:   0%|                                                                                                                          | 0/12 [00:00<?, ?it/s]


TypeError: 'MeanScaleUniformBins' object is not callable