In [1]:
import numpy as np
import pandas as pd
from models import Hankel,Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [2]:
df1 = pd.read_csv('~/data/ctown/dataset03.csv')
df2 = pd.read_csv('~/data/ctown/dataset04.csv')

train_normal = pd.concat((df1,df2[df2['ATT_FLAG']==0]),axis=0,ignore_index=True)
train_attack = df2[df2['ATT_FLAG']==1]

In [3]:
train_normal

Unnamed: 0,DATETIME,L_T1,L_T2,L_T3,L_T4,L_T5,L_T6,L_T7,F_PU1,S_PU1,...,P_J256,P_J289,P_J415,P_J302,P_J306,P_J307,P_J317,P_J14,P_J422,ATT_FLAG
0,06/01/14 00,0.509730,2.049003,3.191145,2.792634,2.656091,5.316831,1.562321,98.998444,1,...,87.605774,26.495605,84.206619,18.901676,81.983734,18.791777,67.125603,29.387470,28.487471,0
1,06/01/14 01,0.412580,2.009072,3.642565,2.831673,3.126387,5.494855,1.852043,99.095901,1,...,89.448341,26.487326,85.900085,18.849329,82.150589,18.739643,67.178696,29.354256,28.454256,0
2,06/01/14 02,0.320112,1.986093,4.140192,3.256733,3.574601,5.500000,2.246126,98.420959,1,...,91.056114,26.487364,86.582474,19.597170,83.988579,19.496712,72.425293,29.354538,28.454538,0
3,06/01/14 03,0.332879,2.009203,4.673478,3.744497,3.952379,5.500000,3.203573,97.575172,1,...,92.594353,26.575815,88.020546,26.028486,64.670486,25.922703,76.275040,29.449951,28.549952,0
4,06/01/14 04,0.483496,2.089049,5.237937,4.409456,3.504676,5.500000,4.439714,97.351059,1,...,94.473099,26.723457,90.422462,26.209970,64.746620,26.104692,76.703529,29.574265,28.674263,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12441,24/12/16 20,2.650000,2.370000,3.850000,3.040000,3.820000,4.940000,2.190000,120.080000,1,...,70.030000,27.380000,84.140000,18.450000,81.670000,18.340000,66.040000,29.880000,28.980000,0
12442,24/12/16 21,2.240000,2.560000,3.420000,2.920000,3.690000,5.020000,1.970000,119.120000,1,...,68.600000,27.660000,83.460000,25.400000,60.850000,25.280000,66.890000,30.190000,29.290000,0
12443,24/12/16 22,1.910000,2.760000,2.950000,2.490000,2.700000,5.140000,1.870000,120.710000,1,...,85.630000,26.840000,82.820000,24.460000,59.560000,24.340000,66.080000,29.680000,28.780000,0
12444,24/12/16 23,1.520000,2.520000,3.330000,2.030000,1.690000,5.100000,1.390000,120.020000,1,...,86.150000,25.780000,103.630000,24.770000,59.010000,24.650000,66.420000,28.980000,28.080000,0


In [4]:
sensors = [col for col in train_normal.columns if col not in ['DATETIME','ATT_FLAG']]

In [5]:
scaler = StandardScaler()
X_normal = pd.DataFrame(index=train_normal.index, columns=sensors, data=scaler.fit_transform(train_normal[sensors]))
X_attack = train_attack[sensors].reset_index().drop(columns=['index'])

In [6]:
hankel = Hankel()
lag = 60
stride = 0.5

In [7]:
df_test = pd.read_csv('~/data/ctown/test_dataset.csv')

# Epasad with 1 cluster and no threshold tuning (training attack included in test data)

In [8]:
test_combined = pd.concat((df_test,train_attack),axis=0)
X_test = pd.DataFrame(index=test_combined.index, columns=sensors, data=scaler.fit_transform(test_combined[sensors]))
Y_test = test_combined.loc[:,'ATT_FLAG']

In [9]:
labels = hankel.fit(np.array(Y_test),lag,stride)
y_actual = np.any(labels>0,axis=0).astype(int)

In [10]:
sensor_models = []
sensor_predicted = []
accuracy = []
precision = []
recall = []
fscore = []
for sens in sensors:
    train_normal = X_normal.loc[:,sens].values
    train_attack = X_attack.loc[:,sens].values
    model = Pipeline()
    model.fit(train_normal,train_attack,lag,stride,optimal_k=1,tune=False,use_gekko=True)
    test = X_test.loc[:,sens].values
    y_predicted = model.predict(test)
    sensor_predicted.append(y_predicted)
    accuracy.append(accuracy_score(y_actual,y_predicted))
    precision.append(precision_score(y_actual,y_predicted))
    recall.append(recall_score(y_actual,y_predicted))
    fscore.append(f1_score(y_actual,y_predicted))
    sensor_models.append(model)    
    print(sens)

--
optimalK 1
cluster shape (413, 60)
eigen values
[ 8.92372222e+03+0.00000000e+00j  6.00336817e+03+0.00000000e+00j
  2.65389398e+03+0.00000000e+00j  2.32992980e+03+0.00000000e+00j
  1.78215249e+03+0.00000000e+00j  1.28645275e+03+0.00000000e+00j
  7.99273628e+02+0.00000000e+00j  3.84533538e+02+0.00000000e+00j
  1.59937920e+02+0.00000000e+00j  1.28710542e+02+0.00000000e+00j
  6.38662979e+01+0.00000000e+00j  6.15559174e+01+0.00000000e+00j
  5.41050136e+01+0.00000000e+00j  3.25356543e+01+0.00000000e+00j
  1.71998813e+01+0.00000000e+00j  1.32118883e+01+0.00000000e+00j
  1.04313494e+01+0.00000000e+00j  8.93126015e+00+0.00000000e+00j
  7.38014193e+00+0.00000000e+00j  6.31546228e+00+0.00000000e+00j
  6.05065513e+00+0.00000000e+00j  5.59228109e+00+0.00000000e+00j
  5.16624260e+00+0.00000000e+00j  4.62431000e+00+0.00000000e+00j
  3.87180970e+00+0.00000000e+00j  2.98658395e+00+0.00000000e+00j
  2.65703571e+00+0.00000000e+00j  2.20598180e+00+0.00000000e+00j
  1.96922277e+00+0.00000000e+00j  1.647

In [11]:
sensor_predicted = np.asarray(sensor_predicted)
y_predicted = np.any(sensor_predicted,axis=0).astype(int)
y_predicted

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [12]:
print("Accuracy ",accuracy_score(y_actual,y_predicted))
print("Precision ",precision_score(y_actual,y_predicted))
print("Recall ",recall_score(y_actual,y_predicted))
print("F1-score ",f1_score(y_actual,y_predicted))

Accuracy  0.5411764705882353
Precision  0.5411764705882353
Recall  1.0
F1-score  0.7022900763358779


In [13]:
print("Accuracy -  Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(accuracy).mean(), np.median(np.asarray(accuracy)),np.asarray(accuracy).min(), np.asarray(accuracy).max()))
print("precision - Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(precision).mean(), np.median(np.asarray(precision)),np.asarray(precision).min(), np.asarray(precision).max()))
print("recall -    Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(recall).mean(), np.median(np.asarray(recall)),np.asarray(recall).min(), np.asarray(recall).max()))
print("f1 -        Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(fscore).mean(), np.median(np.asarray(fscore)),np.asarray(fscore).min(), np.asarray(fscore).max()))

Accuracy -  Mean: 0.49876880984952127 Median : 0.49411764705882355 Min : 0.43529411764705883 Max : 0.6352941176470588
precision - Mean: 0.6412087637364835 Median : 0.75 Min : 0.0 Max : 1.0
recall -    Mean: 0.17138523761375124 Median : 0.08695652173913043 Min : 0.0 Max : 1.0
f1 -        Mean: 0.20343052018612245 Median : 0.16 Min : 0.0 Max : 0.7022900763358779


# Multiple clusters + No threshold tuning (training attack mixed in test data)

In [14]:
sensor_models = []
sensor_predicted = []
accuracy = []
precision = []
recall = []
fscore = []
for sens in sensors:
    train_normal = X_normal.loc[:,sens].values
    train_attack = X_attack.loc[:,sens].values
    model = Pipeline()
    model.fit(train_normal,train_attack,lag,stride,tune=False,kscore_init='inertia',use_gekko=True)
    test = X_test.loc[:,sens].values
    y_predicted = model.predict(test)
    sensor_predicted.append(y_predicted)
    accuracy.append(accuracy_score(y_actual,y_predicted))
    precision.append(precision_score(y_actual,y_predicted))
    recall.append(recall_score(y_actual,y_predicted))
    fscore.append(f1_score(y_actual,y_predicted))
    sensor_models.append(model)    

--
optimalK 8
cluster shape (48, 60)
eigen values
[2.01948918e+03 4.28326584e+02 1.40683247e+02 9.09623028e+01
 6.14753708e+01 2.62884023e+01 1.87941874e+01 1.69228235e+01
 7.12144871e+00 6.37562143e+00 4.37205635e+00 3.01451615e+00
 2.14997693e+00 1.63570588e+00 1.05040388e+00 9.70205454e-01
 7.13097884e-01 5.78544042e-01 4.22144633e-01 3.91413833e-01
 3.64543583e-01 2.98004478e-01 2.31710103e-01 2.13197134e-01
 1.71070232e-01 1.45611540e-01 1.32128074e-01 1.19203791e-01
 8.54355320e-02 6.66784612e-02 5.13651271e-02 4.85501906e-02
 4.24819718e-02 3.51662389e-02 3.55815204e-02 3.10020038e-02
 2.35494169e-02 2.11809391e-02 1.28225008e-03 2.04431908e-03
 1.71604081e-02 1.57089421e-02 4.06789944e-03 5.81813672e-03
 6.23838235e-03 1.33529261e-02 1.07404418e-02 9.31375273e-03]
R: 10
48 10
cluster shape (52, 60)
eigen values
[2.30119963e+03 3.36414937e+02 1.70887302e+02 1.13869981e+02
 7.87025651e+01 3.71158558e+01 2.10705756e+01 1.33553292e+01
 7.42020686e+00 6.82027315e+00 4.77225608e+00 2

In [15]:
sensor_predicted = np.asarray(sensor_predicted)
y_predicted = np.any(sensor_predicted,axis=0).astype(int)
y_predicted

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [16]:
print("Accuracy ",accuracy_score(y_actual,y_predicted))
print("Precision ",precision_score(y_actual,y_predicted))
print("Recall ",recall_score(y_actual,y_predicted))
print("F1-score ",f1_score(y_actual,y_predicted))

Accuracy  0.5411764705882353
Precision  0.5411764705882353
Recall  1.0
F1-score  0.7022900763358779


In [17]:
print("Accuracy -  Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(accuracy).mean(), np.median(np.asarray(accuracy)),np.asarray(accuracy).min(), np.asarray(accuracy).max()))
print("precision - Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(precision).mean(), np.median(np.asarray(precision)),np.asarray(precision).min(), np.asarray(precision).max()))
print("recall -    Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(recall).mean(), np.median(np.asarray(recall)),np.asarray(recall).min(), np.asarray(recall).max()))
print("f1 -        Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(fscore).mean(), np.median(np.asarray(fscore)),np.asarray(fscore).min(), np.asarray(fscore).max()))

Accuracy -  Mean: 0.5214774281805745 Median : 0.5058823529411764 Min : 0.43529411764705883 Max : 0.6823529411764706
precision - Mean: 0.5978408423097303 Median : 0.6818181818181818 Min : 0.0 Max : 1.0
recall -    Mean: 0.2153690596562184 Median : 0.1956521739130435 Min : 0.0 Max : 1.0
f1 -        Mean: 0.27537190596676286 Median : 0.3225806451612903 Min : 0.0 Max : 0.7022900763358779


# Multiple clusters + No threshold tuning (No concat of training and test data)

In [18]:
X_test = pd.DataFrame(index=df_test.index, columns=sensors, data=scaler.fit_transform(df_test[sensors]))
Y_test = df_test.loc[:,'ATT_FLAG']

In [19]:
labels = hankel.fit(np.array(Y_test),lag,stride)
y_actual = np.any(labels>0,axis=0).astype(int)

In [20]:
sensor_models = []
sensor_predicted = []
accuracy = []
precision = []
recall = []
fscore = []
for sens in sensors:
    train_normal = X_normal.loc[:,sens].values
    train_attack = X_attack.loc[:,sens].values
    model = Pipeline()
    model.fit(train_normal,train_attack,lag,stride,tune=False,kscore_init='inertia',use_gekko=True)
    test = X_test.loc[:,sens].values
    y_predicted = model.predict(test)
    sensor_predicted.append(y_predicted)
    accuracy.append(accuracy_score(y_actual,y_predicted))
    precision.append(precision_score(y_actual,y_predicted))
    recall.append(recall_score(y_actual,y_predicted))
    fscore.append(f1_score(y_actual,y_predicted))
    sensor_models.append(model)    

--
optimalK 8
cluster shape (29, 60)
eigen values
[1.32729811e+03 2.39948443e+02 1.04626947e+02 7.53292821e+01
 3.29662847e+01 1.47207004e+01 1.09684978e+01 5.59697901e+00
 4.48921029e+00 3.42157388e+00 2.82477204e+00 1.57115295e+00
 9.55663553e-01 7.59131023e-01 6.69806188e-01 4.95141430e-01
 3.69486845e-01 3.08091940e-01 2.01097881e-01 1.67436193e-01
 1.13231484e-01 9.15213034e-02 8.29162777e-02 1.33916802e-02
 1.57374823e-02 2.26147838e-02 5.99353099e-02 4.52075474e-02
 4.77911278e-02]
R: 10
29 10
cluster shape (41, 60)
eigen values
[2.08942793e+03 1.44444178e+02 1.14166366e+02 4.59088559e+01
 3.26266873e+01 1.23970342e+01 1.15791014e+01 6.19041666e+00
 5.08329917e+00 3.79232175e+00 2.42743136e+00 2.26665781e+00
 1.29084778e+00 1.20843174e+00 8.46806082e-01 6.15087216e-01
 4.53645175e-01 3.38169919e-01 3.28731530e-01 3.11550151e-01
 2.68103212e-01 2.15813595e-01 1.60508098e-01 1.34199845e-01
 1.15932048e-01 9.67803387e-02 8.30203412e-02 7.34877007e-02
 5.25603962e-02 4.28109190e-02 

In [21]:
sensor_predicted = np.asarray(sensor_predicted)
y_predicted = np.any(sensor_predicted,axis=0).astype(int)
y_predicted

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1])

In [22]:
print("Accuracy ",accuracy_score(y_actual,y_predicted))
print("Precision ",precision_score(y_actual,y_predicted))
print("Recall ",recall_score(y_actual,y_predicted))
print("F1-score ",f1_score(y_actual,y_predicted))

Accuracy  0.45588235294117646
Precision  0.4393939393939394
Recall  1.0
F1-score  0.6105263157894737


In [23]:
print("Accuracy -  Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(accuracy).mean(), np.median(np.asarray(accuracy)),np.asarray(accuracy).min(), np.asarray(accuracy).max()))
print("precision - Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(precision).mean(), np.median(np.asarray(precision)),np.asarray(precision).min(), np.asarray(precision).max()))
print("recall -    Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(recall).mean(), np.median(np.asarray(recall)),np.asarray(recall).min(), np.asarray(recall).max()))
print("f1 -        Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(fscore).mean(), np.median(np.asarray(fscore)),np.asarray(fscore).min(), np.asarray(fscore).max()))

Accuracy -  Mean: 0.5899452804377565 Median : 0.5735294117647058 Min : 0.45588235294117646 Max : 0.7205882352941176
precision - Mean: 0.36329267897325357 Median : 0.45454545454545453 Min : 0.0 Max : 1.0
recall -    Mean: 0.1475541299117883 Median : 0.06896551724137931 Min : 0.0 Max : 0.4827586206896552
f1 -        Mean: 0.1955022649453765 Median : 0.1212121212121212 Min : 0.0 Max : 0.5957446808510638


# Multiple clusters + Threshold tuning (No concat of training and test data)

In [24]:
sensor_models = []
sensor_predicted = []
accuracy = []
precision = []
recall = []
fscore = []
for sens in sensors:
    train_normal = X_normal.loc[:,sens].values
    train_attack = X_attack.loc[:,sens].values
    model = Pipeline()
    model.fit(train_normal,train_attack,lag,stride,kscore_init='inertia',use_gekko=True)
    test = X_test.loc[:,sens].values
    y_predicted = model.predict(test)
    sensor_predicted.append(y_predicted)
    accuracy.append(accuracy_score(y_actual,y_predicted))
    precision.append(precision_score(y_actual,y_predicted))
    recall.append(recall_score(y_actual,y_predicted))
    fscore.append(f1_score(y_actual,y_predicted))
    sensor_models.append(model)    

--
optimalK 8
cluster shape (57, 60)
eigen values
[2.58252720e+03 3.82409761e+02 2.42788720e+02 8.50351151e+01
 6.82749550e+01 3.09744806e+01 1.66147668e+01 1.13746242e+01
 7.03206153e+00 6.92003447e+00 5.78494737e+00 3.70591103e+00
 2.82918066e+00 2.43629989e+00 2.10527389e+00 1.28025478e+00
 1.01793518e+00 7.17022891e-01 6.27513370e-01 5.16785133e-01
 4.40138744e-01 3.67484044e-01 3.04088155e-01 2.78087682e-01
 2.12300248e-01 1.96230605e-01 1.60412828e-01 1.42560382e-01
 1.30581809e-01 1.20154190e-01 1.01915195e-01 8.43985431e-02
 7.72725405e-02 6.91015209e-02 5.87314796e-02 5.43602446e-02
 4.01487899e-02 3.76500375e-02 3.27518861e-02 2.78663376e-02
 2.33024492e-02 2.19602154e-02 1.71034728e-02 1.59648730e-02
 1.36400490e-02 9.97593666e-03 9.26296545e-03 8.35687126e-03
 6.05368665e-03 5.26221464e-03 8.52828699e-05 2.04491944e-04
 1.05895561e-03 3.52550551e-03 1.67366213e-03 2.69228079e-03
 2.59182156e-03]
R: 10
57 10
cluster shape (59, 60)
eigen values
[2.65738890e+03 3.43071109e+02 

In [25]:
sensor_predicted = np.asarray(sensor_predicted)
y_predicted = np.any(sensor_predicted,axis=0).astype(int)
y_predicted

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1])

In [26]:
print("Accuracy ",accuracy_score(y_actual,y_predicted))
print("Precision ",precision_score(y_actual,y_predicted))
print("Recall ",recall_score(y_actual,y_predicted))
print("F1-score ",f1_score(y_actual,y_predicted))

Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969


In [27]:
print("Accuracy -  Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(accuracy).mean(), np.median(np.asarray(accuracy)),np.asarray(accuracy).min(), np.asarray(accuracy).max()))
print("precision - Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(precision).mean(), np.median(np.asarray(precision)),np.asarray(precision).min(), np.asarray(precision).max()))
print("recall -    Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(recall).mean(), np.median(np.asarray(recall)),np.asarray(recall).min(), np.asarray(recall).max()))
print("f1 -        Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(fscore).mean(), np.median(np.asarray(fscore)),np.asarray(fscore).min(), np.asarray(fscore).max()))

Accuracy -  Mean: 0.5557455540355677 Median : 0.5735294117647058 Min : 0.4264705882352941 Max : 0.6470588235294118
precision - Mean: 0.3667597855826514 Median : 0.4264705882352941 Min : 0.0 Max : 1.0
recall -    Mean: 0.28869286287089013 Median : 0.20689655172413793 Min : 0.0 Max : 1.0
f1 -        Mean: 0.26694839169857587 Median : 0.3076923076923077 Min : 0.0 Max : 0.6105263157894737


In [28]:
sensor_models[0].threshold_clusters

array([1., 1., 1., 1., 1., 1., 1., 1.])

Accuracy  0.5411764705882353 \\
Precision  0.5411764705882353 \\
Recall  1.0 \\
F1-score  0.7022900763358779

Accuracy  0.4264705882352941 \\
Precision  0.4264705882352941 \\
Recall  1.0 \\
F1-score  0.5979381443298969