### Dataloader Class

In [21]:
import numpy as np
import os
import torch
import torch.nn as nn

from catalyst import dl, utils
from catalyst.core.logger import ILogger
from catalyst.loggers.console import ConsoleLogger
from IPython.display import clear_output
from torchinfo import summary
from torch.utils.data import DataLoader, Dataset, Subset
from typing import Dict
from utils.dataloaderClass import CMAPSS

In [13]:
# use GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

### Load preprocessed data

In [14]:
cmapss = CMAPSS('../CMAPSSData/')

### Split train and validation data


In [15]:
def subset_ind(dataset, ratio: float):
    return np.random.choice(len(dataset), size=int(ratio*len(dataset)), replace=False)

In [16]:
shrink_inds = subset_ind(cmapss, 0.2)

In [17]:
cmapss_subset = Subset(cmapss, shrink_inds)
print(f'Dataset size: {len(cmapss_subset)}')

Dataset size: 4126


In [18]:
val_size = 0.2
val_inds = subset_ind(cmapss, val_size)

train_dataset = Subset(cmapss, [i for i in range(len(cmapss)) if i not in val_inds])
val_dataset = Subset(cmapss, val_inds)

In [19]:
print(f'Training size: {len(train_dataset)}\nValidation size: {len(val_dataset)}')
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

Training size: 16505
Validation size: 4126


In [58]:
# Network
dim_in, dim_out = 16, 16
model = nn.Sequential(
    nn.Linear(dim_in, dim_out),
    nn.Sigmoid(),
)
model.to(device, torch.float32)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

## Catalyst

In [61]:
def _format_metrics(dct: Dict):
    return " | ".join([f"{k}: {float(dct[k]):.03}" for k in sorted(dct.keys())])


class CustomLogger(ConsoleLogger):
    """Custom console logger for parameters and metrics.
    Output the metric into the console during experiment.

    Note:
        We inherit ConsoleLogger to overwrite default Catalyst logging behaviour
    """

    def log_metrics(
        self,
        metrics: Dict[str, float],
        scope: str = None,
        # experiment info
        run_key: str = None,
        global_epoch_step: int = 0,
        global_batch_step: int = 0,
        global_sample_step: int = 0,
        # stage info
        stage_key: str = None,
        stage_epoch_len: int = 0,
        stage_epoch_step: int = 0,
        stage_batch_step: int = 0,
        stage_sample_step: int = 0,
        # loader info
        loader_key: str = None,
        loader_batch_len: int = 0,
        loader_sample_len: int = 0,
        loader_batch_step: int = 0,
        loader_sample_step: int = 0,
    ) -> None:
        """Logs loader and epoch metrics to stdout."""
        if scope == "loader":
            prefix = f"{loader_key} ({stage_epoch_step}/{stage_epoch_len}) "
            print(prefix + _format_metrics(metrics))

        elif scope == "epoch":
            prefix = f"* Epoch ({stage_epoch_step}/{stage_epoch_len}) "
            print(prefix + _format_metrics(metrics["_epoch_"]))

In [62]:
loaders = {
    # TODO put train=false for val data insicde cmapss class
    "train": DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
    "valid": DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
}

In [63]:
runner = dl.SupervisedRunner(
    input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)

In [67]:
print(runner.__dict__)

{'engine': <catalyst.engines.torch.CPUEngine object at 0x2ac69b100>, 'loggers': {'console': <__main__.CustomLogger object at 0x2ac69b9d0>, '_csv': <catalyst.loggers.csv.CSVLogger object at 0x2ac698670>, '_tensorboard': <catalyst.loggers.tensorboard.TensorboardLogger object at 0x2ac2a1090>}, 'loaders': {'train': <accelerate.data_loader.DataLoaderShard object at 0x2ac69b400>, 'valid': <accelerate.data_loader.DataLoaderShard object at 0x2ac68dab0>}, 'model': Sequential(
  (0): Linear(in_features=16, out_features=16, bias=True)
  (1): Sigmoid()
), 'criterion': CrossEntropyLoss(), 'optimizer': AcceleratedOptimizer (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.001
    maximize: False
    weight_decay: 0
), 'scheduler': None, 'callbacks': OrderedDict([(0, <catalyst.callbacks.metrics.accuracy.AccuracyCallback object at 0x2ac698040>), (1, <catalyst.callbacks.metrics.class

In [64]:
# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    loggers={"console": CustomLogger()},
    num_epochs=6,
    callbacks=[
        dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, 3, 5)),
        dl.PrecisionRecallF1SupportCallback(input_key="labels", target_key="RUL"),
    ],
    logdir="./logs",
    valid_loader="valid",
    valid_metric="loss",
    minimize_valid_metric=True,
    verbose=True,
    load_best_on_end=True,
)

1/6 * Epoch (train):   0%|          | 0/516 [00:00<?, ?it/s]

KeyError: 16486

In [None]:
# model evaluation
metrics = runner.evaluate_loader(
    loader=loaders["valid"],
    callbacks=[dl.AccuracyCallback(input_key="labels", target_key="RUL", topk=(1, 3, 5))],
)

# model inference
for prediction in runner.predict_loader(loader=loaders["valid"]):
    assert prediction["labels"].detach().cpu().numpy().shape[-1] == 10

# model post-processing
model = runner.model.cpu()
batch = next(iter(loaders["valid"]))[0]
utils.trace_model(model=model, batch=batch)
utils.quantize_model(model=model)
utils.prune_model(model=model, pruning_fn="l1_unstructured", amount=0.8)
utils.onnx_export(model=model, batch=batch, file="./logs/cmapss.onnx", verbose=True)

We also need some validation code to reload the best checkpoint, evaluate it using our validation data and compute metrics.

In [None]:
from contextlib import contextmanager


@contextmanager
def infer(model):
    """Fully turns model state to inference (and restores it in the end)"""
    status = model.training
    model.train(False)
    with torch.no_grad():
        try:
            yield None
        finally:
            model.train(status)

In [None]:
from sklearn.metrics import accuracy_score


def load_ckpt(path, model, device=torch.device("cpu")):
    """
    Load saved checkpoint weights to model
    :param path: full path to checkpoint
    :param model: initialized model class nested from nn.Module()
    :param device: base torch device for validation
    :return: model with loaded 'state_dict'
    """
    assert os.path.isfile(path), FileNotFoundError(f"no file: {path}")

    ckpt = torch.load(path, map_location=device)
    ckpt_dict = ckpt["model_state_dict"]
    model_dict = model.state_dict()
    ckpt_dict = {k: v for k, v in ckpt_dict.items() if k in model_dict}
    model_dict.update(ckpt_dict)
    model.load_state_dict(model_dict)
    return model


@torch.no_grad()
def validate_model(model, loader, device):
    """
    Evaluate implemented model
    :param model: initialized model class nested from nn.Module() with loaded state dict
    :param loader batch data loader for evaluation set
    :param device: base torch device for validation
    :return: dict performance metrics
    """
    label_list = []
    pred_list = []
    model.train(False)
    model = model.to(device)

    for data_tensor, lbl_tensor in loader:
        lbl_values = lbl_tensor.cpu().view(-1).tolist()
        label_list.extend(lbl_values)
        logits = model(data_tensor.to(device))
        scores = F.softmax(logits.detach().cpu(), 1).numpy()
        pred_labels = np.argmax(scores, 1)
        pred_list.extend(pred_labels.ravel().tolist())

    labels = np.array(label_list)
    predicted = np.array(pred_list)
    acc = accuracy_score(labels, predicted)
    print(f"model accuracy: {acc:.4f}")
    metric_dict = {"accuracy": acc}
    return metric_dict

Let's test it: first we re-initialize the model and load the checkpoint state dict

In [None]:
ckpt_fp = os.path.join("logs", "checkpoints", "best.pth")  # "last.pth", "epoch01.pth" ...
mod = BaseModel(num_classes=num_classes)
mod = load_ckpt(ckpt_fp, mod).eval()

Then we can run validation code using the corresponding data loader

In [None]:
new_runner = validate_model(mod, loaders["valid"], device)