In [1]:
from typing import List, Tuple
import torch
from torch.nn.utils.rnn import pad_sequence


def collate_pad(batch: List[Tuple[torch.Tensor, float, int]]):
    seqs, targets, lengths = zip(*batch)

    lengths_t = torch.tensor(lengths, dtype=torch.long)
    padded = pad_sequence(seqs, batch_first=True)
    targets_t = torch.tensor(targets, dtype=torch.float32)

    return padded.float(), lengths_t, targets_t


In [2]:
from torch.utils.data import DataLoader
from dataset import CmapssRandomCropDataset

def make_loader(sequences_by_unit, rul_by_unit, samples_per_epoch, batch_size, l_min, l_max, num_workers=0):
    ds = CmapssRandomCropDataset(
        sequences_by_unit=sequences_by_unit,
        rul_by_unit=rul_by_unit,
        samples_per_epoch=samples_per_epoch,
        l_min=l_min,
        l_max=l_max,
    )

    loader = DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        collate_fn=collate_pad,
        drop_last=True,
        pin_memory=True,
    )

    return loader


In [3]:
@torch.no_grad()
def predict_rul(model, seq: torch.Tensor, device: torch.device) -> float:
    model.eval()

    if seq.dim() != 2:
        raise ValueError("Expected seq shape [L, F]")

    padded = seq.unsqueeze(0).to(device).float()
    lengths = torch.tensor([seq.shape[0]], dtype=torch.long, device=device)

    pred = model(padded, lengths)[0].item()
    return float(pred)


In [4]:
import torch
import torch.nn as nn


def train_one_epoch(model, loader, optimizer, device):
    model.train()
    loss_fn = nn.SmoothL1Loss()

    total_loss = 0.0
    n = 0

    for padded, lengths, targets in loader:
        padded = padded.to(device)
        lengths = lengths.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        preds = model(padded, lengths)
        loss = loss_fn(preds, targets)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        bs = targets.shape[0]
        total_loss += loss.item() * bs
        n += bs

    return total_loss / max(n, 1)


In [5]:
import pandas as pd

# The dataset has 26 columns
# 1. Unit Number (Which engine is it?)
# 2. Time Cycles (How long has it been running?)
# 3-5. Operational Settings (Altitude, Speed, etc.)
# 6-26. Sensor Readings (s1 to s21)

index_names = ['unit_nr', 'time_cycles']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = ['s_{}'.format(i) for i in range(1, 22)] 
col_names = index_names + setting_names + sensor_names

print(col_names)

def load_fd(fd_tag):
    train_path = f"data/train_{fd_tag}.txt"
    test_path  = f"data/test_{fd_tag}.txt"
    rul_path   = f"data/RUL_{fd_tag}.txt"

    raw_train_df = pd.read_csv(train_path, sep=r'\s+', header=None, names=col_names)
    raw_test_df  = pd.read_csv(test_path,  sep=r'\s+', header=None, names=col_names)
    raw_rul_labels_df = pd.read_csv(rul_path, header=None, names=['RUL_truth'])

    # train labels: compute RUL from run-to-failure
    max_cycle = raw_train_df.groupby('unit_nr')['time_cycles'].max().rename('max_cycle')
    raw_train_df = raw_train_df.merge(max_cycle, left_on='unit_nr', right_index=True)
    raw_train_df['RUL'] = raw_train_df['max_cycle'] - raw_train_df['time_cycles']

    # test labels: provided separately
    return raw_train_df, raw_test_df, raw_rul_labels_df

next_unit = 1
train_dfs = []
test_dfs = []
test_rul_labels = []

data_tags = ["FD001","FD002","FD003","FD004"]
data_tags = ["FD001"]

for fd_tag in data_tags:
    train_df_chunk, test_df_chunk, rul_labels_chunk = load_fd(fd_tag)
    train_df_chunk['fd'] = fd_tag
    test_df_chunk['fd'] = fd_tag
    test_df_chunk = test_df_chunk.assign(
        unit_nr_orig=test_df_chunk['unit_nr'],
        unit_nr=test_df_chunk['unit_nr'] + next_unit - 1
    )
    test_dfs.append(test_df_chunk)
    test_rul_labels.append(rul_labels_chunk)

    # make a mapping for this FD's units
    uniq_units = sorted(train_df_chunk['unit_nr'].unique())
    mapping = {u: next_unit + i for i, u in enumerate(uniq_units)}
    next_unit += len(uniq_units)

    train_df_chunk = train_df_chunk.assign(
        unit_nr_orig=train_df_chunk['unit_nr'],
        unit_nr=train_df_chunk['unit_nr'].map(mapping),
        fd=fd_tag
    )
    train_dfs.append(train_df_chunk)
    
    
data_df = pd.concat(train_dfs, ignore_index=True)
train_df = pd.concat(train_dfs, ignore_index=True)
test_df = pd.concat(test_dfs, ignore_index=True)
rul_labels_df = pd.concat(test_rul_labels, ignore_index=True)

print(f"Train data shape: {train_df.shape}")
print(f"Test data shape: {test_df.shape}")
print(f"RUL labels shape: {rul_labels_df.shape}")

engine_ids = train_df['unit_nr'].unique()

train_ids = engine_ids[:80]
val_ids = engine_ids[80:]
print("len train ids:", len(train_ids))
print("len val ids:", len(val_ids))
print("len engine ids:", len(engine_ids))

train_split_df = train_df[train_df['unit_nr'].isin(train_ids)]
val_split_df = train_df[train_df['unit_nr'].isin(val_ids)]
print("Train df shape:", train_split_df.shape)
print("Val df shape:", val_split_df.shape)




['unit_nr', 'time_cycles', 'setting_1', 'setting_2', 'setting_3', 's_1', 's_2', 's_3', 's_4', 's_5', 's_6', 's_7', 's_8', 's_9', 's_10', 's_11', 's_12', 's_13', 's_14', 's_15', 's_16', 's_17', 's_18', 's_19', 's_20', 's_21']
Train data shape: (20631, 30)
Test data shape: (13096, 28)
RUL labels shape: (100, 1)
len train ids: 80
len val ids: 20
len engine ids: 100
Train df shape: (16138, 30)
Val df shape: (4493, 30)


In [6]:
from sklearn.preprocessing import MinMaxScaler


not_scaled_cols = ['unit_nr', 'RUL', 'max_cycle', 'time_cycles']

col_set = set(col_names)
columns_to_scale = [col for col in col_names if col not in not_scaled_cols]

print("Columns to scale:", columns_to_scale)

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_split = scaler.fit_transform(train_split_df[columns_to_scale])
scaled_train_split_df = pd.DataFrame(scaled_train_split, columns=columns_to_scale, index=train_split_df.index)
scaled_val_split = scaler.transform(val_split_df[columns_to_scale])
scaled_val_split_df = pd.DataFrame(scaled_val_split, columns=columns_to_scale, index=val_split_df.index)

scaled_train_split_df.insert(0, 'unit_nr', train_split_df['unit_nr'])
scaled_train_split_df.insert(1, 'time_cycles', train_split_df['time_cycles'])
scaled_train_split_df.insert(len(scaled_train_split_df.columns), 'RUL', train_split_df['RUL'])
scaled_train_split_df.insert(len(scaled_train_split_df.columns), 'max_cycle', train_split_df['max_cycle'])

scaled_val_split_df.insert(0, 'unit_nr', val_split_df['unit_nr'])
scaled_val_split_df.insert(1, 'time_cycles', val_split_df['time_cycles'])
scaled_val_split_df.insert(len(scaled_val_split_df.columns), 'RUL', val_split_df['RUL'])
scaled_val_split_df.insert(len(scaled_val_split_df.columns), 'max_cycle', val_split_df['max_cycle'])

scaled_train_split_df.head()





Columns to scale: ['setting_1', 'setting_2', 'setting_3', 's_1', 's_2', 's_3', 's_4', 's_5', 's_6', 's_7', 's_8', 's_9', 's_10', 's_11', 's_12', 's_13', 's_14', 's_15', 's_16', 's_17', 's_18', 's_19', 's_20', 's_21']


Unnamed: 0,unit_nr,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,s_5,...,s_14,s_15,s_16,s_17,s_18,s_19,s_20,s_21,RUL,max_cycle
0,1,1,0.456647,0.166667,0.0,0.0,0.183735,0.425154,0.309757,0.0,...,0.199608,0.363986,0.0,0.363636,0.0,0.0,0.708661,0.725482,191,192
1,1,2,0.606936,0.25,0.0,0.0,0.283133,0.473456,0.352633,0.0,...,0.162813,0.411312,0.0,0.363636,0.0,0.0,0.661417,0.732001,190,192
2,1,3,0.248555,0.75,0.0,0.0,0.343373,0.386193,0.370527,0.0,...,0.171793,0.357445,0.0,0.181818,0.0,0.0,0.622047,0.619473,189,192
3,1,4,0.537572,0.5,0.0,0.0,0.343373,0.267715,0.331195,0.0,...,0.174889,0.166603,0.0,0.363636,0.0,0.0,0.566929,0.661565,188,192
4,1,5,0.387283,0.333333,0.0,0.0,0.349398,0.269082,0.404625,0.0,...,0.174734,0.402078,0.0,0.454545,0.0,0.0,0.582677,0.70479,187,192


In [7]:
scaled_train_split_df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 16138 entries, 0 to 16137
Data columns (total 28 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   unit_nr      16138 non-null  int64  
 1   time_cycles  16138 non-null  int64  
 2   setting_1    16138 non-null  float64
 3   setting_2    16138 non-null  float64
 4   setting_3    16138 non-null  float64
 5   s_1          16138 non-null  float64
 6   s_2          16138 non-null  float64
 7   s_3          16138 non-null  float64
 8   s_4          16138 non-null  float64
 9   s_5          16138 non-null  float64
 10  s_6          16138 non-null  float64
 11  s_7          16138 non-null  float64
 12  s_8          16138 non-null  float64
 13  s_9          16138 non-null  float64
 14  s_10         16138 non-null  float64
 15  s_11         16138 non-null  float64
 16  s_12         16138 non-null  float64
 17  s_13         16138 non-null  float64
 18  s_14         16138 non-null  float64
 19  s_15     

In [8]:
scaled_val_split_df.info()
scaled_val_split_df.head()

<class 'pandas.core.frame.DataFrame'>
Index: 4493 entries, 16138 to 20630
Data columns (total 28 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   unit_nr      4493 non-null   int64  
 1   time_cycles  4493 non-null   int64  
 2   setting_1    4493 non-null   float64
 3   setting_2    4493 non-null   float64
 4   setting_3    4493 non-null   float64
 5   s_1          4493 non-null   float64
 6   s_2          4493 non-null   float64
 7   s_3          4493 non-null   float64
 8   s_4          4493 non-null   float64
 9   s_5          4493 non-null   float64
 10  s_6          4493 non-null   float64
 11  s_7          4493 non-null   float64
 12  s_8          4493 non-null   float64
 13  s_9          4493 non-null   float64
 14  s_10         4493 non-null   float64
 15  s_11         4493 non-null   float64
 16  s_12         4493 non-null   float64
 17  s_13         4493 non-null   float64
 18  s_14         4493 non-null   float64
 19  s_15  

Unnamed: 0,unit_nr,time_cycles,setting_1,setting_2,setting_3,s_1,s_2,s_3,s_4,s_5,...,s_14,s_15,s_16,s_17,s_18,s_19,s_20,s_21,RUL,max_cycle
16138,81,1,0.208092,0.75,0.0,0.0,0.25,0.429938,0.411546,0.0,...,0.179792,0.464025,0.0,0.272727,0.0,0.0,0.559055,0.60856,239,240
16139,81,2,0.630058,0.666667,0.0,0.0,0.433735,0.346548,0.432647,0.0,...,0.208639,0.509427,0.0,0.363636,0.0,0.0,0.590551,0.62089,238,240
16140,81,3,0.468208,0.916667,0.0,0.0,0.403614,0.350422,0.238859,0.0,...,0.205439,0.489804,0.0,0.545455,0.0,0.0,0.692913,0.643282,237,240
16141,81,4,0.491329,0.5,0.0,0.0,0.361446,0.543404,0.381668,0.0,...,0.227991,0.444017,0.0,0.363636,0.0,0.0,0.480315,0.727749,236,240
16142,81,5,0.635838,0.666667,0.0,0.0,0.662651,0.443153,0.461344,0.0,...,0.202652,0.277799,0.0,0.363636,0.0,0.0,0.692913,0.600907,235,240


In [None]:
sequences_by_unit = {}
rul_by_unit = {}

feature_cols = col_names[2:]  # Exclude 'unit_nr' and 'time_cycles'

import torch

def build_unit_dicts(df, feature_cols):
    sequences_by_unit = {}
    rul_by_unit = {}

    df = df.sort_values(["unit_nr", "time_cycles"])

    for unit_id, g in df.groupby("unit_nr", sort=False):
        x = torch.tensor(g[feature_cols].to_numpy(), dtype=torch.float32)
        y = torch.tensor(g["RUL"].to_numpy(), dtype=torch.float32)

        sequences_by_unit[int(unit_id)] = x
        rul_by_unit[int(unit_id)] = y

    return sequences_by_unit, rul_by_unit

train_sequences_by_unit, train_rul_by_unit = build_unit_dicts(scaled_train_split_df, feature_cols)
val_sequences_by_unit, val_rul_by_unit = build_unit_dicts(scaled_val_split_df, feature_cols)

from dataset import CmapssRandomCropDataset

dataset = CmapssRandomCropDataset(
    sequences_by_unit=train_sequences_by_unit,
    rul_by_unit=train_rul_by_unit,
    samples_per_epoch=1000,
    l_min=30,
    l_max=100,
)

seq, target, length = dataset[0]  # or dataset[42], same behavior
print(seq.shape, target, length)

train_loader = make_loader(train_sequences_by_unit, train_rul_by_unit, samples_per_epoch=10000, batch_size=32, l_min=30, l_max=200)
padded, lengths, targets = next(iter(train_loader))
print(padded.shape, lengths.shape, targets.shape)

val_loader = make_loader(val_sequences_by_unit, val_rul_by_unit, samples_per_epoch=8000, batch_size=32, l_min=30, l_max=200)
padded, lengths, targets = next(iter(val_loader))
print(padded.shape, lengths.shape, targets.shape)



torch.Size([90, 24]) 12.0 90
torch.Size([32, 140, 24]) torch.Size([32]) torch.Size([32])
torch.Size([32, 158, 24]) torch.Size([32]) torch.Size([32])


In [50]:
import torch
import torch.nn as nn
import copy

EPOCHS = 20

def train_full(model, loader, optimizer, device):
    model.train()
    loss_fn = nn.SmoothL1Loss()

    last_epoch_loss = 0.0



    for epoch in range(EPOCHS):
        model.train()
        n = 0
        epoch_loss = 0
        total_loss = 0.0
        total_samples = 0
        for padded, lengths, targets in loader:
            padded = padded.to(device)
            lengths = lengths.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()

            preds = model(padded, lengths)
            loss = loss_fn(preds, targets)

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()
            bs = targets.shape[0]
            total_loss += loss.item() * bs
            total_samples += bs
            epoch_loss += loss.item()
            n += bs
        
        epoch_mean_loss = total_loss / max(total_samples, 1)
        print(f"Epoch {epoch+1}/{EPOCHS} total loss: {total_loss:.4f}, mean loss: {epoch_mean_loss:.4f}")

        model.eval()
        #with torch.no_grad():
        #    preds = model()


        latest_model = copy.deepcopy(model.state_dict())




In [54]:
from model import RulLstm


model = RulLstm(
    n_features=len(feature_cols),
    hidden_size=64,
    num_layers=2,
    dropout=0.2,
)



optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model.to(device)

#loss = train_one_epoch(model, dataset_loader, optimizer, device)

loss = train_full(model, train_loader, optimizer, device)

Using device: cuda


TypeError: train_full() missing 1 required positional argument: 'device'

In [57]:
import torch
import torch.nn as nn

def run_epoch(model, loader, loss_fn, device, train: bool, optimizer=None):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    total_samples = 0

    if train:
        context = torch.enable_grad()
    else:
        context = torch.no_grad()

    with context:
        for padded, lengths, targets in loader:
            padded = padded.to(device)
            lengths = lengths.to(device)
            targets = targets.to(device)

            preds = model(padded, lengths)
            loss = loss_fn(preds, targets)

            bs = targets.size(0)
            total_loss += loss.item() * bs
            total_samples += bs

            if train:
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

    return total_loss / max(total_samples, 1)


In [58]:
import copy

EPOCHS = 20

def train_full(model, train_loader, val_loader, optimizer, device):
    loss_fn = nn.SmoothL1Loss(reduction="mean")

    best_val = float("inf")
    best_state = None

    for epoch in range(EPOCHS):
        train_loss = run_epoch(
            model=model,
            loader=train_loader,
            loss_fn=loss_fn,
            device=device,
            train=True,
            optimizer=optimizer,
        )

        val_loss = run_epoch(
            model=model,
            loader=val_loader,
            loss_fn=loss_fn,
            device=device,
            train=False,
            optimizer=None,
        )

        print(
            f"Epoch {epoch+1}/{EPOCHS} "
            f"train_loss: {train_loss:.4f} | val_loss: {val_loss:.4f}"
        )

        if val_loss < best_val:
            best_val = val_loss
            best_state = copy.deepcopy(model.state_dict())

    if best_state is not None:
        model.load_state_dict(best_state)

    return best_val


In [59]:
from model import RulLstm


model = RulLstm(
    n_features=len(feature_cols),
    hidden_size=64,
    num_layers=2,
    dropout=0.2,
)



optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model.to(device)

#loss = train_one_epoch(model, dataset_loader, optimizer, device)

loss = train_full(model, train_loader, val_loader, optimizer, device)

Using device: cuda
Epoch 1/20 train_loss: 32.8874 | val_loss: 30.1581
Epoch 2/20 train_loss: 26.5449 | val_loss: 30.2501
Epoch 3/20 train_loss: 19.0747 | val_loss: 18.5627
Epoch 4/20 train_loss: 12.2974 | val_loss: 14.6034
Epoch 5/20 train_loss: 10.1090 | val_loss: 12.8112
Epoch 6/20 train_loss: 9.5594 | val_loss: 12.4224
Epoch 7/20 train_loss: 9.2045 | val_loss: 11.4218
Epoch 8/20 train_loss: 8.9363 | val_loss: 11.5449
Epoch 9/20 train_loss: 8.7233 | val_loss: 11.8341
Epoch 10/20 train_loss: 8.7024 | val_loss: 12.4358
Epoch 11/20 train_loss: 8.5117 | val_loss: 11.9647
Epoch 12/20 train_loss: 8.5177 | val_loss: 11.4252
Epoch 13/20 train_loss: 8.2313 | val_loss: 11.0311
Epoch 14/20 train_loss: 8.0163 | val_loss: 11.1253
Epoch 15/20 train_loss: 8.0923 | val_loss: 10.9845
Epoch 16/20 train_loss: 8.1057 | val_loss: 10.9642
Epoch 17/20 train_loss: 7.8929 | val_loss: 10.9441
Epoch 18/20 train_loss: 7.9647 | val_loss: 11.4296
Epoch 19/20 train_loss: 7.8910 | val_loss: 10.1145
Epoch 20/20 trai