In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

  warn(f"Failed to load image Python extension: {e}")


In [3]:
class DenseAE(pl.LightningModule):
    def __init__(self, x_dim, z_dim, h_dim):
        super().__init__()
            
        self.encoder = nn.Sequential(
            nn.Linear(x_dim, h_dim),
            nn.ReLU(),
            nn.BatchNorm1d(h_dim),

            nn.Linear(h_dim, h_dim),
            nn.ReLU(),
            nn.BatchNorm1d(h_dim),
            
            nn.Linear(h_dim, z_dim),
        ) 
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, h_dim),
            nn.ReLU(),
            nn.BatchNorm1d(h_dim),
            
            nn.Linear(h_dim, h_dim),
            nn.ReLU(),
            nn.BatchNorm1d(h_dim),
            
            nn.Linear(h_dim, x_dim),
            nn.Sigmoid(),
        )
        self.loss_fn = nn.MSELoss(reduction='sum')
        
    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat, z
    
    
    def training_step(self, batch, batch_idx):
        x = batch["input"]
        x_recon, _ = self(x)
        
        loss = self.loss_fn(x, x_recon)
        self.log("train/loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x = batch["input"]
        x_recon, _ = self(x)
        
        loss = self.loss_fn(x, x_recon)
        self.log("val/loss", loss)

    def configure_optimizers(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-3)
        return opt

In [4]:
from src.data.data_module import SliderDataModule

dm = SliderDataModule(
    data_dir="../data/raw",
    batch_size=256,
    num_workers=8,
    normalize=True,
    maxlen=312,
    use_cnn=False,
    iter_over_cols=True
)

models = {}
for machine_id in dm.machine_ids["dev"]:
    dm.setup_subset("dev", machine_id)
    model = DenseAE(x_dim=128, h_dim=256, z_dim=64)
    early_stopping = pl.callbacks.EarlyStopping('val/loss', patience=20, min_delta=1)
    callbacks = [early_stopping]
    logger = pl.loggers.TensorBoardLogger("lightning_logs/", name="DenseAE", 
                                          sub_dir=machine_id,
                                          version=0)
    trainer = pl.Trainer(accelerator="gpu", devices=1,
                        callbacks=callbacks, max_epochs=50,
                        log_every_n_steps=1,
                        logger=logger)
    
    print(f"Fitting a model for machine {machine_id}")
    trainer.fit(model, datamodule=dm)
    models[f"dev_{machine_id}"] = model

Producing melspectrograms for all audios:   0%|          | 0/968 [00:00<?, ?it/s]

Producing melspectrograms for all audios:   0%|          | 0/456 [00:00<?, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Fitting a model for machine 00


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 116 K 
1 | decoder | Sequential | 116 K 
2 | loss_fn | MSELoss    | 0     
---------------------------------------
232 K     Trainable params
0         Non-trainable params
232 K     Total params
0.931     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Producing melspectrograms for all audios:   0%|          | 0/968 [00:00<?, ?it/s]

Producing melspectrograms for all audios:   0%|          | 0/367 [00:00<?, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 116 K 
1 | decoder | Sequential | 116 K 
2 | loss_fn | MSELoss    | 0     
---------------------------------------
232 K     Trainable params
0         Non-trainable params
232 K     Total params
0.931     Total estimated model params size (MB)


Fitting a model for machine 02


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Producing melspectrograms for all audios:   0%|          | 0/434 [00:00<?, ?it/s]

Producing melspectrograms for all audios:   0%|          | 0/278 [00:00<?, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 116 K 
1 | decoder | Sequential | 116 K 
2 | loss_fn | MSELoss    | 0     
---------------------------------------
232 K     Trainable params
0         Non-trainable params
232 K     Total params
0.931     Total estimated model params size (MB)


Fitting a model for machine 04


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [5]:
print("hi")

hi


In [9]:
dm.active_subsets["test"].X.shape, len(dm.active_subsets["test"].y)

(torch.Size([142728, 128]), 142728)

In [17]:
from tqdm.notebook import tqdm
import numpy as np
from sklearn.metrics import roc_auc_score
mse = nn.MSELoss(reduction="none")
for machine_id in dm.machine_ids["dev"]:
    y_pred = []
    y_test = []
    print(f"Reconstruction on test set for machine {machine_id}")
    model = models[f"dev_{machine_id}"]
    model = model.cuda().eval()
    dm.setup_subset("dev", machine_id)
    
    for batch in tqdm(dm.test_dataloader()):
        x_test = batch['input'].cuda()
        labels = np.array(batch['label'])
        y_test.append(np.where(labels == "anomaly", 1, 0))
        # y_test.append(batch['label'])
        with torch.no_grad():
            x_recon, _ = model(x_test)
            y_pred.append(mse(x_recon, x_test).mean(dim=1))
            
    y_pred = torch.cat(y_pred).cpu().numpy()
    y_test = np.concatenate(y_test)
    print(roc_auc_score(y_test, y_pred), roc_auc_score(y_test, y_pred, max_fpr=0.1))


Reconstruction on test set for machine 00


  0%|          | 0/558 [00:00<?, ?it/s]

0.7952391931247225 0.6845881500224015
Reconstruction on test set for machine 02


  0%|          | 0/449 [00:00<?, ?it/s]

0.684831694639476 0.5428557411233064
Reconstruction on test set for machine 04


  0%|          | 0/340 [00:00<?, ?it/s]

0.7025728526714653 0.6418435796490953
