In [62]:
import sys
sys.path.append("C:/Users/user/meepc")
import numpy as np
import pandas as pd
from models import Hankel,Corrhankel,Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [63]:
df1 = pd.read_csv('~/data/ctown/dataset03.csv')
df2 = pd.read_csv('~/data/ctown/dataset04.csv')

train_normal = pd.concat((df1,df2[df2['ATT_FLAG']==0]),axis=0,ignore_index=True)
train_attack = df2[df2['ATT_FLAG']==1]

In [64]:
sensors = [col for col in train_normal.columns if col not in ['DATETIME','ATT_FLAG']]

In [65]:
scaler = StandardScaler()
X_normal = pd.DataFrame(index=train_normal.index, columns=sensors, data=scaler.fit_transform(train_normal[sensors]))
X_attack = train_attack[sensors].reset_index().drop(columns=['index'])

In [66]:
hankel = Hankel()
corrhankel = Corrhankel()
lag = 60
stride = 0.5

In [67]:
corr_normal,nolag_normal = corrhankel.fit(X_normal.to_numpy(),lag,stride)

In [68]:
corr_attack,nolag_attack = corrhankel.fit(X_attack.to_numpy(),lag,stride)

In [69]:
df_test = pd.read_csv('~/data/ctown/test_dataset.csv')

# Epasad with 1 cluster and no threshold tuning (training attack included in test data)

In [70]:
test_combined = pd.concat((df_test,train_attack),axis=0)
X_test = pd.DataFrame(index=test_combined.index, columns=sensors, data=scaler.fit_transform(test_combined[sensors]))
corr_test,nolag_test = corrhankel.fit(X_test.to_numpy(),lag,stride)
Y_test = test_combined.loc[:,'ATT_FLAG']

In [71]:
labels = hankel.fit(np.array(Y_test),lag,stride)
y_actual = np.any(labels>0,axis=0).astype(int)

In [34]:
sensor_models = []
sensor_predicted = []
accuracy = []
precision = []
recall = []
fscore = []
for i,sens in enumerate(sensors):
    train_normal = X_normal.loc[:,sens].values
    train_attack = X_attack.loc[:,sens].values
    model = Pipeline()
    model.fit(train_normal,train_attack,lag,stride,optimal_k=1,tune=False,corr_normal=corr_normal[:,i].reshape(nolag_normal,len(X_normal.columns)).T)
    test = X_test.loc[:,sens].values
    y_predicted = model.predict(test,corr_test=corr_test[:,i].reshape(nolag_test,len(X_normal.columns)).T)
    sensor_predicted.append(y_predicted)
    accuracy.append(accuracy_score(y_actual,y_predicted))
    precision.append(precision_score(y_actual,y_predicted))
    recall.append(recall_score(y_actual,y_predicted))
    fscore.append(f1_score(y_actual,y_predicted))
    sensor_models.append(model)    

In [35]:
sensor_predicted = np.asarray(sensor_predicted)
y_predicted = np.any(sensor_predicted,axis=0).astype(int)
y_predicted

array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1])

In [36]:
print("Accuracy ",accuracy_score(y_actual,y_predicted))
print("Precision ",precision_score(y_actual,y_predicted))
print("Recall ",recall_score(y_actual,y_predicted))
print("F1-score ",f1_score(y_actual,y_predicted))

Accuracy  0.7647058823529411
Precision  0.7407407407407407
Recall  0.8695652173913043
F1-score  0.7999999999999999


In [37]:
# Accuracy  0.7764705882352941
# Precision  0.7647058823529411
# Recall  0.8478260869565217
# F1-score  0.8041237113402062

In [38]:
print("Accuracy -  Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(accuracy).mean(), np.median(np.asarray(accuracy)),np.asarray(accuracy).min(), np.asarray(accuracy).max()))
print("precision - Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(precision).mean(), np.median(np.asarray(precision)),np.asarray(precision).min(), np.asarray(precision).max()))
print("recall -    Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(recall).mean(), np.median(np.asarray(recall)),np.asarray(recall).min(), np.asarray(recall).max()))
print("f1 -        Mean: {} Median : {} Min : {} Max : {}".format(np.asarray(fscore).mean(), np.median(np.asarray(fscore)),np.asarray(fscore).min(), np.asarray(fscore).max()))

Accuracy -  Mean: 0.49958960328317376 Median : 0.5058823529411764 Min : 0.4588235294117647 Max : 0.5882352941176471
precision - Mean: 0.6365706360063405 Median : 0.8333333333333334 Min : 0.0 Max : 1.0
recall -    Mean: 0.08543983822042468 Median : 0.08695652173913043 Min : 0.0 Max : 0.30434782608695654
f1 -        Mean: 0.14575586469868504 Median : 0.16 Min : 0.0 Max : 0.4444444444444444


# Multiple clusters + No threshold tuning (training attack mixed in test data)

In [39]:
fscores_k_1 = []
for k in range(2,8):
    sensor_models = []
    sensor_predicted = []
    accuracy = []
    precision = []
    recall = []
    fscore = []
    for i,sens in enumerate(sensors):
        train_normal = X_normal.loc[:,sens].values
        train_attack = X_attack.loc[:,sens].values
        model = Pipeline()
        model.fit(train_normal,train_attack,lag,stride,optimal_k=k,tune=False,kscore_init='inertia',corr_normal=corr_normal[:,i].reshape(nolag_normal,len(X_normal.columns)).T)
        test = X_test.loc[:,sens].values
        y_predicted = model.predict(test,corr_test=corr_test[:,i].reshape(nolag_test,len(X_normal.columns)).T)
        sensor_predicted.append(y_predicted)
        accuracy.append(accuracy_score(y_actual,y_predicted))
        precision.append(precision_score(y_actual,y_predicted))
        recall.append(recall_score(y_actual,y_predicted))
        fscore.append(f1_score(y_actual,y_predicted))
        sensor_models.append(model)    
    sensor_predicted = np.asarray(sensor_predicted)
    y_predicted = np.any(sensor_predicted,axis=0).astype(int) 
    print("-------number of clusters------", k)
    print("Accuracy ",accuracy_score(y_actual,y_predicted))
    print("Precision ",precision_score(y_actual,y_predicted))
    print("Recall ",recall_score(y_actual,y_predicted))
    print("F1-score ",f1_score(y_actual,y_predicted))
    fscores_k_1.append(f1_score(y_actual,y_predicted))

-------number of clusters------ 2
Accuracy  0.7529411764705882
Precision  0.7659574468085106
Recall  0.782608695652174
F1-score  0.7741935483870968
-------number of clusters------ 3
Accuracy  0.7411764705882353
Precision  0.75
Recall  0.782608695652174
F1-score  0.7659574468085107
-------number of clusters------ 4
Accuracy  0.788235294117647
Precision  0.78
Recall  0.8478260869565217
F1-score  0.8125
-------number of clusters------ 5
Accuracy  0.7176470588235294
Precision  0.6833333333333333
Recall  0.8913043478260869
F1-score  0.7735849056603774
-------number of clusters------ 6
Accuracy  0.7411764705882353
Precision  0.6935483870967742
Recall  0.9347826086956522
F1-score  0.7962962962962964
-------number of clusters------ 7
Accuracy  0.6588235294117647
Precision  0.6307692307692307
Recall  0.8913043478260869
F1-score  0.7387387387387386


In [40]:
fscores_k_1

[0.7741935483870968,
 0.7659574468085107,
 0.8125,
 0.7735849056603774,
 0.7962962962962964,
 0.7387387387387386]

# Multiple clusters + No threshold tuning (No concat of training and test data)

In [72]:
X_test = pd.DataFrame(index=df_test.index, columns=sensors, data=scaler.fit_transform(df_test[sensors]))
corr_test,nolag_test = corrhankel.fit(X_test.to_numpy(),lag,stride)
Y_test = df_test.loc[:,'ATT_FLAG']

In [73]:
labels = hankel.fit(np.array(Y_test),lag,stride)
y_actual = np.any(labels>0,axis=0).astype(int)

In [43]:
fscores_k_2=[]
for k in range(2,8):
    sensor_models = []
    sensor_predicted = []
    accuracy = []
    precision = []
    recall = []
    fscore = []
    for i,sens in enumerate(sensors):
        train_normal = X_normal.loc[:,sens].values
        train_attack = X_attack.loc[:,sens].values
        model = Pipeline()
        model.fit(train_normal,train_attack,lag,stride,optimal_k = k,tune=False,kscore_init='inertia',corr_normal=corr_normal[:,i].reshape(nolag_normal,len(X_normal.columns)).T,
                corr_attack=corr_attack[:,i].reshape(nolag_attack,len(X_attack.columns)).T)
        test = X_test.loc[:,sens].values
        y_predicted = model.predict(test,corr_test=corr_test[:,i].reshape(nolag_test,len(X_normal.columns)).T)
        sensor_predicted.append(y_predicted)
        accuracy.append(accuracy_score(y_actual,y_predicted))
        precision.append(precision_score(y_actual,y_predicted))
        recall.append(recall_score(y_actual,y_predicted))
        fscore.append(f1_score(y_actual,y_predicted))
        sensor_models.append(model)    
    sensor_predicted = np.asarray(sensor_predicted)
    y_predicted = np.any(sensor_predicted,axis=0).astype(int)
    print("-------number of clusters------", k)
    print("Accuracy ",accuracy_score(y_actual,y_predicted))
    print("Precision ",precision_score(y_actual,y_predicted))
    print("Recall ",recall_score(y_actual,y_predicted))
    print("F1-score ",f1_score(y_actual,y_predicted))
    fscores_k_2.append(f1_score(y_actual,y_predicted))

-------number of clusters------ 2
Accuracy  0.7205882352941176
Precision  0.6388888888888888
Recall  0.7931034482758621
F1-score  0.7076923076923076
-------number of clusters------ 3
Accuracy  0.7058823529411765
Precision  0.6451612903225806
Recall  0.6896551724137931
F1-score  0.6666666666666667
-------number of clusters------ 4
Accuracy  0.6617647058823529
Precision  0.5833333333333334
Recall  0.7241379310344828
F1-score  0.6461538461538462
-------number of clusters------ 5
Accuracy  0.5882352941176471
Precision  0.5102040816326531
Recall  0.8620689655172413
F1-score  0.641025641025641
-------number of clusters------ 6
Accuracy  0.6323529411764706
Precision  0.5416666666666666
Recall  0.896551724137931
F1-score  0.6753246753246753
-------number of clusters------ 7
Accuracy  0.5735294117647058
Precision  0.5
Recall  0.9310344827586207
F1-score  0.6506024096385543


In [44]:
fscores_k_2

[0.7076923076923076,
 0.6666666666666667,
 0.6461538461538462,
 0.641025641025641,
 0.6753246753246753,
 0.6506024096385543]

# Multiple clusters + Threshold tuning (No concat of training and test data)

In [74]:
fscores_k_3 = []
for k in range(2,8):
    sensor_models = []
    sensor_predicted = []
    accuracy = []
    precision = []
    recall = []
    fscore = []
    for i,sens in enumerate(sensors):
        train_normal = X_normal.loc[:,sens].values
        train_attack = X_attack.loc[:,sens].values
        model = Pipeline()
        model.fit(train_normal,train_attack,lag,stride,optimal_k=k,kscore_init='inertia',corr_normal=corr_normal[:,i].reshape(nolag_normal,len(X_normal.columns)).T,
                corr_attack=corr_attack[:,i].reshape(nolag_attack,len(X_attack.columns)).T)
        test = X_test.loc[:,sens].values
        y_predicted = model.predict(test,corr_test=corr_test[:,i].reshape(len(X_normal.columns),nolag_test))
        sensor_predicted.append(y_predicted)
        accuracy.append(accuracy_score(y_actual,y_predicted))
        precision.append(precision_score(y_actual,y_predicted))
        recall.append(recall_score(y_actual,y_predicted))
        fscore.append(f1_score(y_actual,y_predicted))
        sensor_models.append(model)    
    sensor_predicted = np.asarray(sensor_predicted)
    y_predicted = np.any(sensor_predicted,axis=0).astype(int)
    print("-------number of clusters------", k)
    print("Accuracy ",accuracy_score(y_actual,y_predicted))
    print("Precision ",precision_score(y_actual,y_predicted))
    print("Recall ",recall_score(y_actual,y_predicted))
    print("F1-score ",f1_score(y_actual,y_predicted))
    fscores_k_3.append(f1_score(y_actual,y_predicted))

-------number of clusters------ 2
Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969
-------number of clusters------ 3
Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969
-------number of clusters------ 4
Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969
-------number of clusters------ 5
Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969
-------number of clusters------ 6
Accuracy  0.4264705882352941
Precision  0.4264705882352941
Recall  1.0
F1-score  0.5979381443298969
-------number of clusters------ 7
Accuracy  0.45588235294117646
Precision  0.4393939393939394
Recall  1.0
F1-score  0.6105263157894737


In [75]:
fscores_k_3

[0.5979381443298969,
 0.5979381443298969,
 0.5979381443298969,
 0.5979381443298969,
 0.5979381443298969,
 0.6105263157894737]