#Leak Detection in Smart Water-Distribution System Using ANNs

##Library Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

# حتما از tensorflow.keras استفاده کن
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Input, Dropout, Activation, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import regularizers

##File Imports (For Google Colab)

These file imports have been created for google colab

The original data (leakdata.csv) was filtered and shuffled. Then, the data was divided into training and testing data. It was further used as given

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import io
trainingdata = pd.read_csv(io.BytesIO(uploaded['training_data.csv']))
testingdata = pd.read_csv(io.BytesIO(uploaded['testing_data.csv']))
alldata = pd.read_csv(io.BytesIO(uploaded['complete_dataset.csv']))

## Data Analysis

In [None]:
alldata.head()

In [None]:
cor=alldata.corr()

In [None]:
#Retrieve corelation matrix between differnet parameters
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()

In [None]:
#retrieving corelation of various parameters on leak
cor_target = abs(cor["LEAK"])
#Selecting highly correlated features
relevant_features = cor_target[cor_target>0.05]
relevant_features

In [None]:
#Average Value of flow in case of no leak (blue) and leak (orange)
#Large variation backs up the heavy dependence of flow on leak
sns.barplot(x='LEAK',y='FLOW',data=alldata)

In [None]:
#Average Value of pressure in case of no leak (blue) and leak (orange)
#Large variation backs up the heavy dependence of pressure on leak
sns.barplot(x='LEAK',y='PRESSURE',data=alldata)

In [None]:
#Average Value of temperature in case of no leak (blue) and leak (orange)
sns.barplot(x='LEAK',y='TT03',data=alldata)

In [None]:
#Average Value of flow in case of no leak (blue) and leak (orange)
#Large variation backs up the heavy dependence of vibration of a pipe on leak
sns.barplot(x='LEAK',y='VIBRATION',data=alldata)

In [None]:
sns.distplot(alldata['LEAK'],kde=False)

In [None]:
#Pressure vs. Flow relation in case of a leak and no leak
sns.lmplot(x='FLOW',y='PRESSURE',data=alldata,hue='LEAK', markers=['o','v'])

In [None]:
#Vibration vs. Flow relation in case of a leak and no leak
sns.lmplot(x='FLOW',y='VIBRATION',data=alldata,hue='LEAK', markers=['o','v'])

In [None]:
#Vibration vs. Pressure relation in case of a leak and no leak
sns.lmplot(x='VIBRATION',y='PRESSURE',data=alldata,hue='LEAK', markers=['o','v'])

##Data Loading and Shuffling

In [None]:
tr=trainingdata.to_numpy()
te=testingdata.to_numpy()
xtrain=tr[:,1:21] #training data input
ytrain=tr[:,21:23] #training data's ground truth
xtest=te[:,1:21] #testing data input
ytest=te[:,21:23] #testing data ground truth

In [None]:
def shuffle_in_unison(a, b): # Inplace shuffling
    rng_state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(rng_state)
    np.random.shuffle(b)
    return a,b

In [None]:
xtrain,ytrain = shuffle_in_unison(xtrain,ytrain)
xtest,ytest = shuffle_in_unison(xtest,ytest)

##Model Training

In [None]:
model=Sequential()
model.add(Dense(4,input_dim=20,activation='relu',use_bias='True',kernel_regularizer=regularizers.l1_l2(l1=0.01,l2=0.01)))
#model.add(Dense(4,activation='relu'))
model.add(Dense(2,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adadelta', metrics=['accuracy'])
model.summary()
history_model=model.fit(xtrain,ytrain,epochs=100)

In [None]:
#to plot model performance measures
history = history_model

plt.plot(history.history['loss'])
plt.title('Loss Function vs. Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()

##Model Testing

In [None]:
#Test the model based on testing data and obtain confusion matrix, precision, recall, accuracy and F1-score
def getresults(model,data_test,y_test,batch):
    y_pred = model.predict(data_test,batch_size=batch)
    print(metrics.confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)))
    print(metrics.classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1), digits=3))

In [None]:

getresults(model,xtest,ytest,1)
print('Legends - \n 0 : leak \n 1 : no leak')

### Observations (For Test Data) -



*   Leak classified as no leak = 2/61
*   No leak classified as leak = 6/39


*   Precision - Out of all predicted as positive (TP +FP), how many of them were actually positive (TP)
*   Recall - Out of all actually positive (TP+FN), how many of them were predicted positive (TP)

For Leak,
Precision=92.5%
and Recall=90.7%



   

