In [6]:
import pdb
import numpy as np
import pandas as pd
import networkx as nx
import wntr
import torch
import torch.nn as nn

import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, confusion_matrix

from ML_for_WDN.data_utils import clean_dataframes, load_data

from ML_for_WDN.models import UnsupervisedLeakDetector

# set working directory to root directory
%cd ..

torch.set_default_dtype(torch.float32)

LATENT_DIM = 8
SUPERVISED = False

ENCODER_ARGS = {
    'hidden_dims': [16, 12, 8],
    'latent_dim': LATENT_DIM,
}

DECODER_ARGS = {
    'latent_dim': LATENT_DIM,
    'hidden_dims': [8, 12, 16],
    'num_pars': 4 if SUPERVISED else None,
}


DATA_FILES_TRAIN = [
    'data/data_no_leak.xlsx',
]

DATA_FILES_TEST = [
    'data/data_leak_1.xlsx',
    'data/data_leak_2.xlsx',
    'data/data_leak_3.xlsx',
]

columns_to_use = [
    'FM01_flow', 'FM02_head', 'FM03_flow', 'FM05_flow', 'FM06_flow', 'FM08_flow', 'FM09_flow', 'FM11_flow', 'FM13_flow',
    'FM01_head', 'FM02_flow', 'FM03_head', 'FM05_head', 'FM06_head', 'FM08_head', 'FM09_head', 'FM11_head', 'FM13_head',
]

dataframes = []
for data_file in DATA_FILES_TRAIN:
    df = load_data(data_file)
    dataframes.append(df)

dataframes = clean_dataframes(
    dataframes,
    columns_to_use=columns_to_use,
)
train_data = dataframes[0]

test_data = train_data.iloc[-5000:, :]
train_data = train_data.iloc[:-5000, :]


/export/scratch1/ntm/nikolaj_workstation_data/PhD


In [7]:

preprocessor = StandardScaler().fit(train_data.values)
train_data = preprocessor.transform(train_data.values)

NN_args = {
    'encoder_args': ENCODER_ARGS,
    'decoder_args': DECODER_ARGS,
}
NN_train_args = {
    'epochs': 1000,
    'batch_size': 512,
    'lr': 5e-3,
    'weight_decay': 1e-4,
    'loss_fn': nn.MSELoss(),
    'supervised_pars': None,
}
anomaly_detection_args = {
}
model = UnsupervisedLeakDetector(
    **NN_args,
)

model.fit(
    data=train_data,
    NN_train_args=NN_train_args,
    anomaly_detection_args=anomaly_detection_args,
    device='cpu',
)

dataframes = []
for data_file in DATA_FILES_TEST:
    df = load_data(data_file)
    dataframes.append(df)

dataframes = clean_dataframes(
    dataframes,
    columns_to_use=columns_to_use,
)
dataframes = pd.concat(dataframes, ignore_index=True)

test_data = pd.concat([test_data, dataframes], ignore_index=True)
test_data = preprocessor.transform(test_data.values)

targets = np.zeros((test_data.shape[0]))
targets[0:5000] = 1
targets[5000:] = -1

preds = model.predict(
    X=test_data,
)
cm = confusion_matrix(targets, preds)
print(f'Accuracy: {accuracy_score(targets, preds):0.3f}')
print(f'Recall: {cm[1,1]/(cm[1,1]+cm[1,0])}')
print(f'Precision: {cm[1,1]/(cm[1,1]+cm[0,1])}')
    
disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=['leak', 'No Leak'],
)
disp.plot()
plt.show()




########## Training stage 1 ##########


Training autoencoder without leak data


Autoencoder architecture:
- Latent dimension: 8
- Encoder hidden dimensions: [16, 12, 8]
- Decoder hidden dimensions: [8, 12, 16]




Loss: 0.7872 | Latent: 2.1884:  12%|█▏        | 116/1000 [00:17<02:12,  6.67it/s]


KeyboardInterrupt: 