In [82]:
import time
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import *

In [83]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


In [84]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("behrad3d/nasa-cmaps")

print("Path to dataset files:", path)

Path to dataset files: /Users/yoda/.cache/kagglehub/datasets/behrad3d/nasa-cmaps/versions/1


In [85]:
TRAIN_DATA_FILENAME = path + "/CMaps/train_FD001.txt"
TEST_DATA_FILENAME = path + "/CMaps/test_FD001.txt"
TEST_RUL_FILENAME = path + "/CMaps/RUL_FD001.txt"

In [224]:
class FD001Dataset(Dataset):
    def __init__(self, data_filename, labels_filename=None):
        self.data = np.loadtxt(data_filename)
        if labels_filename is None:
            # this is a train dataset, will infer labels from data
            n = int(self.data[:,0].max())
            self.labels = np.zeros(len(self.data))
            for i in range(n):
                mask = self.data[:,0].astype(int)==i+1
                rul = int(self.data[mask,1].max())
                self.labels[mask] = rul - self.data[mask,1].astype(int) + 1
        else:
            # this is a test dataset, will load labels separately
            ruls = np.loadtxt(labels_filename)
            n = int(self.data[:,0].max())
            self.labels = np.zeros(len(self.data))
            for i in range(n):
                mask = self.data[:,0].astype(int)==i+1
                rul = int(self.data[mask,1].max()) + ruls[i]
                self.labels[mask] = rul - self.data[mask,1].astype(int) + 1
                #print(rul)
                #print(self.data[mask,1].astype(int))
                #print(self.labels[mask])

    def normalize(self, ms=None):
        if ms is None:
            self.m = np.average(self.data[:,2:], axis=0)
            self.s = np.std(self.data[:,2:], axis=0)
            # HACK
            self.s[self.s==0] = 1
        else:
            (self.m, self.s) = ms
        return (self.m, self.s)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, k):
        x = (self.data[k][2:] - self.m) / self.s
        return torch.Tensor(x), torch.Tensor([ min(100, self.labels[k]) ])

In [225]:
train_dataset = FD001Dataset(TRAIN_DATA_FILENAME)
norm = train_dataset.normalize()
print(f"{len(train_dataset)} train instances")

test_dataset = FD001Dataset(TEST_DATA_FILENAME, TEST_RUL_FILENAME)
test_dataset.normalize(norm)
print(f"{len(test_dataset)} test instances")

20631 train instances
13096 test instances


In [226]:
# This line defined the batch size
batch_size = 256

# Create data loaders.
train_dataloader      = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size)
test_dataloader       = DataLoader(test_dataset, batch_size=batch_size)

# Show the shape of the first instance from the test dataset
for X, y in test_dataloader:
    print(f"Shape of X: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X: torch.Size([256, 24])
Shape of y: torch.Size([256, 1]) torch.float32


In [227]:
class ModelFC(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(24, 240),
            nn.ReLU(),
            nn.Linear(240, 60),
            nn.ReLU(),
            nn.Linear(60, 20),
            nn.ReLU(),
            nn.Linear(20, 1)
        )

    def forward(self, x):
        y = self.fc(x)
        return y

In [228]:
## Uncomment the model you want to try out, leaving all others commented out

model = ModelFC().to(device)

print(model)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

ModelFC(
  (fc): Sequential(
    (0): Linear(in_features=24, out_features=240, bias=True)
    (1): ReLU()
    (2): Linear(in_features=240, out_features=60, bias=True)
    (3): ReLU()
    (4): Linear(in_features=60, out_features=20, bias=True)
    (5): ReLU()
    (6): Linear(in_features=20, out_features=1, bias=True)
  )
)


In [229]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    t = 0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Prints loss progress every second
        if time.monotonic()-t > 1:
            t = time.monotonic()
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [230]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, abs_error = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            #print(X.shape, pred.shape, y.shape)
            test_loss += loss_fn(pred, y).item()
            abs_error += (pred - y).abs().type(torch.float).sum().item()
            #print(f"pred: {pred[:10]}\ny: {y[:10]}")
    test_loss /= num_batches
    abs_error /= size
    print(f"Average absolute error: {abs_error}, Avg loss: {test_loss}")

In [231]:
# Define here the number of epochs. Increase if loss descent does not stabilize in time.
epochs = 20

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    print("Train: ", end="")
    test(train_dataloader, model, loss_fn)
    print("Test: ", end="")
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 6583.066895  [  256/20631]
Train: Average absolute error: 21.811844698209004, Avg loss: 691.6232586142457
Test: Average absolute error: 20.884084688721448, Avg loss: 624.1694813508254
Epoch 2
-------------------------------
loss: 633.775818  [  256/20631]
Train: Average absolute error: 13.483491698721735, Avg loss: 274.40252666708864
Test: Average absolute error: 12.29789081985949, Avg loss: 238.0065087905297
Epoch 3
-------------------------------
loss: 269.542236  [  256/20631]
Train: Average absolute error: 11.244658673868738, Avg loss: 203.75435610170717
Test: Average absolute error: 10.133353921357786, Avg loss: 174.26257082132193
Epoch 4
-------------------------------
loss: 165.140533  [  256/20631]
Train: Average absolute error: 9.849109325156546, Avg loss: 172.3897880271629
Test: Average absolute error: 8.470694474935094, Avg loss: 140.78558008487408
Epoch 5
-------------------------------
loss: 179.901855  [  256/20631]
Train: Ave