In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from scipy.stats import genpareto

from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from tqdm.notebook import trange, tqdm
#from anomaly_scoring import get_anomaly_scores

import torch
import torch.optim as optim
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
%matplotlib inline 

import utils
from utils import read_machine_data_cvae
from utils import softclip
from utils import plot_train_test_reconstructions_cvae
import evaluation_utils
from models.cnn_sigmacVAE_flow import CNN_sigmacVAE_flow

from maf import MAF

plt.plot([0,1],[2,3])
plt.show()




In [2]:
window_size = 24
cond_window_size = 8
batch_size=256

X_train_data, X_test_data, X_train_tensor, cond_train_tensor, X_test_tensor, cond_test_tensor, df_Y_test, trainloader, testloader = read_machine_data_cvae('../../datasets/ServerMachineDataset/machine-1-1', window_size, cond_window_size, batch_size)

In [3]:
def train_flow_model(model, num_epochs, learning_rate, dataloader):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    epochs=num_epochs
    tq = tqdm(range(epochs))
    
    losses = []
    
    for epoch in tq:
        flag = False
        for j, data in enumerate(dataloader, 0):

            model.train()
            
            optimizer.zero_grad()

            #batches
            inputs, cond = data
            inputs = inputs.cuda() if torch.cuda.is_available() else inputs.cpu()
            inputs.to(device)
            cond = cond.cuda() if torch.cuda.is_available() else cond.cpu()
            cond.to(device)

            outputs, rec_mu, rec_sigma, kl = model(inputs, cond)

            rec_comps, rec, rec_mu_sigma_loss, kl = model.loss_function(outputs, inputs, rec_mu, rec_sigma, kl)

            loss = rec + kl + rec_mu_sigma_loss

            if(np.isnan(loss.item())):
                print("Noped out at", epoch, j, kl, rec_comps)
                flag = True
                break

            loss.backward()
            optimizer.step()
        if(flag):
            break
        tq.set_postfix(loss=loss.item())
        #print(epoch, 'total :' + str(loss.item()) + ' rec : ' + str(rec.item()) + ' kl : ' + str(kl.sum().item()) + ' sigma: ' + str(model.log_sigma.item()))

        losses.append(loss)
        
        #break
        
    plt.plot(losses)
    plt.show()
    
    return model

In [12]:
model = CNN_sigmacVAE_flow(latent_dim=8, window_size=window_size, cond_window_size=cond_window_size, flow_type='RealNVP')
model.to(device)
model.cuda() if torch.cuda.is_available() else model.cpu()
print(model)

model = train_flow_model(model, 5, .005, trainloader)

CNN_sigmacVAE_flow(
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(5, 5), stride=(1, 1))
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 4, kernel_size=(5, 5), stride=(1, 1))
  (bn3): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc41): Linear(in_features=2080, out_features=8, bias=True)
  (fc42): Linear(in_features=2080, out_features=8, bias=True)
  (defc1): Linear(in_features=388, out_features=1872, bias=True)
  (deconv1): ConvTranspose2d(4, 16, kernel_size=(7, 10), stride=(1, 1))
  (debn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (deconv2): ConvTranspose2d(16, 8, kernel_size=(7, 10), stride=(1, 1))
  (debn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (deconv3): ConvTrans

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))






In [13]:

plot_train_test_reconstructions_cvae(model, X_train_tensor, X_train_data, X_test_tensor, X_test_data, cond_train_tensor, cond_test_tensor, window_size, cond_window_size)

  plt.figure()
  plt.show()


MSE : 0.03518


  plt.figure()
  plt.show()


MSE : 0.03589


In [14]:
import datetime
print(datetime.datetime.now())
evaluation_utils.cVAE_anomaly_detection(model, X_test_tensor, X_test_data, cond_test_tensor, X_train_data, df_Y_test, .05)

print(datetime.datetime.now())

2021-01-25 12:47:51.796789
Computing AUPR for 28456 thresholds ... 


  _warn_prf(average, modifier, msg_start, len(result))
  plt.ylabel('Precision')
  plt.figure(figsize=(50,15))



--- AUPR ---
0.8853524198557919
Best F1 score : 0.9655048508803449 at threshold : -0.30606568861711314 (1-percentile : 0.03449514911965512)
Corresponding best precision : 0.9355849582172702, best recall : 0.9974016332590943


  plt.show()
  plt.figure(figsize=(50,15))
  plt.show()



--- Metrics ---
precision : 0.7681532304173814 recall : 0.9974016332590943 f1 : 0.8678940568475453


2021-01-25 12:57:23.366434
