In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import SGDClassifier
import joblib  # For saving the models
import os

# Ensure the directory exists for saving models
if not os.path.exists('models'):
    os.makedirs('models')


In [2]:
train_data=pd.read_csv('data1_train.csv') 
test_data=pd.read_csv('data1_test.csv') 

In [3]:
pd.set_option('display.max_columns',80)
pd.set_option('display.max_rows',20)

In [4]:
train_data["Timestamp"]=pd.to_datetime(train_data["Timestamp"])
test_data["Timestamp"]=pd.to_datetime(test_data["Timestamp"])

In [5]:
train_data.set_index('Timestamp',inplace=True)
train_data.sort_index(axis=0,ascending=True, inplace=True)

In [6]:
test_data.set_index('Timestamp',inplace=True)
test_data.sort_index(axis=0,ascending=True, inplace=True)

In [7]:
train_data

Unnamed: 0_level_0,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,MV201,P201,P203,P204,P205,P206,DPIT301,FIT301,LIT301,MV301,MV302,MV303,MV304,P301,P302,AIT401,AIT402,FIT401,LIT401,P402,P403,UV401,AIT501,AIT502,AIT503,AIT504,FIT501,FIT502,FIT503,FIT504,P501,PIT501,PIT502,PIT503,FIT601,P602,NORATK
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
2015-12-22 16:30:04,0.000000,124.5098,1,1,1,251.9226,8.313446,312.7916,0.000000,1,1,1,1,1,1,2.560983,0.000256,138.9067,1,1,1,1,1,1,0.0000,169.2387,0.000000,134.1964,1,1,1,7.446360,175.4166,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:21,0.000000,124.3135,1,1,1,251.9226,8.308960,312.7916,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.1043,1,1,1,1,1,1,0.0000,169.2387,0.000000,133.4273,1,1,1,7.446360,175.3909,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:28,0.000000,123.5677,1,1,1,251.9226,8.313766,312.7916,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.7051,1,1,1,1,1,1,0.0000,169.2387,0.000000,133.6580,1,1,1,7.444758,175.3909,260.7024,123.50680,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:30,0.000000,123.3714,1,1,1,251.9226,8.313766,312.9198,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.4247,1,1,1,1,1,1,0.0000,169.2387,0.000000,133.4658,1,1,1,7.444758,175.3909,260.7024,123.42990,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:41,0.000000,125.1378,1,1,1,251.9226,8.309921,312.9198,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.9053,1,1,1,1,1,1,0.0000,169.2643,0.000000,133.3504,1,1,1,7.444758,175.3909,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.068188,0.000000,3.3485,0.000256,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-02 13:41:07,2.534670,524.1028,2,2,1,168.8990,8.633236,304.5373,2.461026,2,1,2,1,2,1,20.551890,2.203119,364.3863,1,2,1,1,1,2,148.8032,144.4501,1.715439,987.0546,2,1,2,7.765189,138.2466,262.9774,15.88054,1.724942,1.255284,0.734629,0.306569,2,249.519300,0.945119,187.5801,0.000000,1,1
2016-01-02 13:41:08,2.517055,524.2206,2,2,1,168.8990,8.633236,304.5373,2.461026,2,1,2,1,2,1,20.551890,2.203119,364.3863,1,2,1,1,1,2,148.8032,144.6296,1.715439,987.3622,2,1,2,7.765189,138.5542,262.9774,15.88054,1.724942,1.246189,0.734629,0.308362,2,249.519300,0.945119,187.5801,0.000000,1,1
2016-01-02 13:41:09,2.502322,524.2206,2,2,1,168.8990,8.633236,304.5373,2.461026,2,1,2,1,2,1,20.551890,2.203119,364.3863,1,2,1,1,1,2,148.8032,144.6296,1.715439,987.6313,2,1,2,7.765189,138.5542,262.9774,15.88054,1.724942,1.244396,0.734629,0.308042,2,249.519300,0.945119,187.5801,0.000000,1,1
2016-01-02 13:41:10,2.482465,524.2991,2,2,1,168.8990,8.634518,304.5373,2.461026,2,1,2,1,2,1,20.487870,2.203119,364.3863,1,2,1,1,1,2,148.8032,144.6296,1.715439,987.7467,2,1,2,7.765189,138.5542,262.9774,15.88054,1.724045,1.265659,0.734629,0.306633,2,249.519300,0.945119,187.5801,0.000000,1,1


In [8]:
test_data

Unnamed: 0_level_0,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,MV201,P201,P203,P204,P205,P206,DPIT301,FIT301,LIT301,MV301,MV302,MV303,MV304,P301,P302,AIT401,AIT402,FIT401,LIT401,P402,P403,UV401,AIT501,AIT502,AIT503,AIT504,FIT501,FIT502,FIT503,FIT504,P501,PIT501,PIT502,PIT503,FIT601,P602,NORATK
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
2015-12-22 16:30:06,0.000000,123.5284,1,1,1,251.9226,8.313446,312.7916,0.000000,1,1,1,1,1,1,2.560983,0.000256,138.7064,1,1,1,1,1,1,0.0000,169.2387,0.000000,134.3886,1,1,1,7.446360,175.3909,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:11,0.000000,124.2742,1,1,1,251.9226,8.308960,312.9198,0.000000,1,1,1,1,1,1,2.560983,0.000256,138.3459,1,1,1,1,1,1,0.0000,169.2387,0.000000,133.0813,1,1,1,7.446360,175.3909,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.100231,0.000000,3.3485,0.000256,1,0
2015-12-22 16:30:38,0.000000,124.1565,1,1,1,251.9226,8.309921,312.9198,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.6250,1,1,1,1,1,1,0.0000,169.2643,0.000000,133.3889,1,1,1,7.444758,175.3909,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.068188,0.000000,3.3485,0.000256,1,0
2015-12-22 16:31:00,0.000000,123.5284,1,1,1,251.9226,8.310242,313.1761,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.9855,1,1,1,1,1,1,0.0000,169.4181,0.000000,132.9659,1,1,1,7.443476,175.5704,260.7024,123.31450,0.001538,0.001409,0.001664,0.000000,1,9.068188,0.000000,3.3485,0.000256,1,0
2015-12-22 16:31:18,0.000000,123.2144,1,1,1,251.9226,8.310242,313.1761,0.000000,1,1,1,1,1,1,2.560983,0.000256,137.5849,1,1,1,1,1,1,0.0000,169.5463,0.000000,133.2735,1,1,1,7.445399,175.7754,260.7024,123.42990,0.001538,0.001409,0.001664,0.000000,1,9.068188,0.000000,3.3485,0.000256,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-02 13:40:18,2.459405,518.6074,2,2,1,168.9951,8.625546,304.6655,2.462564,2,1,2,1,2,1,20.468660,2.202863,370.8863,1,2,1,1,1,2,148.8032,144.3989,1.715824,986.1318,2,1,2,7.761023,138.5798,263.9387,15.91899,1.724430,1.303318,0.732708,0.306825,2,249.583400,0.945119,187.6442,0.000000,1,1
2016-01-02 13:40:35,2.617942,522.6505,2,2,1,168.8990,8.623623,304.7167,2.463589,2,1,2,1,2,1,20.558290,2.200301,370.8863,1,2,1,1,1,2,148.8032,144.5527,1.713517,985.8242,2,1,2,7.762625,138.3235,263.6183,15.91899,1.723148,1.292558,0.734629,0.306825,2,249.391200,0.945119,187.5320,0.000000,1,1
2016-01-02 13:40:41,2.510329,522.9252,2,2,1,168.8990,8.620739,304.7167,2.464486,2,1,2,1,2,1,20.555090,2.201582,370.3863,1,2,1,1,1,2,148.8032,144.5527,1.712363,985.4396,2,1,2,7.764227,138.3235,263.3620,15.68828,1.722763,1.270142,0.733988,0.308619,2,249.391200,0.945119,187.5320,0.000000,1,1
2016-01-02 13:40:50,2.417769,523.4355,2,2,1,168.8990,8.623302,304.7167,2.462692,2,1,2,1,2,1,20.439850,2.201582,366.3863,1,2,1,1,1,2,148.8032,144.7578,1.716721,985.7857,2,1,2,7.763266,138.3235,263.3620,15.88054,1.723532,1.312540,0.734629,0.306249,2,249.535400,0.945119,187.5961,0.000000,1,1


In [9]:
# Load train and test datasets

# train_data = pd.read_csv('train.csv')
# test_data = pd.read_csv('test.csv')

# Assuming the last column is the target variable
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]

X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]


In [10]:
# Dictionary to store models and their corresponding results
models = {
    'Naive Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SGD': SGDClassifier(loss='hinge', max_iter=1000, tol=1e-3, random_state=42),
    'GBM': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
}

results = {}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    
    # Save the model
    model_path = f'models/{model_name}.joblib'
    joblib.dump(model, model_path)
    
    # Store results
    results[model_name] = {
        'Accuracy': accuracy,
        'Confusion Matrix': cm,
        'Model Path': model_path,
        'Classification Report': cr
        
    }

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Confusion Matrix:\n{cm}\n")
    print(f"{model_name} - classification_report:\n{cr}\n")
    
    


Naive Bayes - Accuracy: 0.9549
Naive Bayes - Confusion Matrix:
[[20000     0]
 [ 1354  8646]]

Naive Bayes - classification_report:
              precision    recall  f1-score   support

           0       0.94      1.00      0.97     20000
           1       1.00      0.86      0.93     10000

    accuracy                           0.95     30000
   macro avg       0.97      0.93      0.95     30000
weighted avg       0.96      0.95      0.95     30000


KNN - Accuracy: 1.0000
KNN - Confusion Matrix:
[[20000     0]
 [    0 10000]]

KNN - classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     20000
           1       1.00      1.00      1.00     10000

    accuracy                           1.00     30000
   macro avg       1.00      1.00      1.00     30000
weighted avg       1.00      1.00      1.00     30000


Random Forest - Accuracy: 1.0000
Random Forest - Confusion Matrix:
[[20000     0]
 [    0 10000]]

Rando

## Testing the accuracy in all 5 classifier

In [12]:
adv_df=pd.read_csv('Combined Adversarial data.csv')

In [13]:
adv_df

Unnamed: 0.1,Unnamed: 0,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,MV201,P201,P203,P204,P205,P206,DPIT301,FIT301,LIT301,MV301,MV302,MV303,MV304,P301,P302,AIT401,AIT402,FIT401,LIT401,P402,P403,UV401,AIT501,AIT502,AIT503,AIT504,FIT501,FIT502,FIT503,FIT504,P501,PIT501,PIT502,PIT503,FIT601,P602,source_file
0,0,2.346866,798.013444,1.0,1.0,1.0,263.332196,8.449136,318.786179,-0.004715,1.0,1.0,1.0,1.0,1.0,1.0,2.167212,0.006726,1012.488516,1.0,1.0,1.0,1.0,1.0,1.0,148.790364,155.079071,1.729572,803.780944,2.0,1.0,2.0,7.886393,143.626477,265.252661,12.428313,1.723680,1.310269,0.726398,0.298903,2.0,248.799418,0.928726,187.843808,-0.007467,1.0,adversarial data MV101.csv
1,1,2.360944,797.997745,1.0,1.0,1.0,263.944089,8.420539,319.618798,0.002538,1.0,1.0,1.0,1.0,1.0,1.0,2.178258,0.018014,1011.428858,1.0,1.0,1.0,1.0,1.0,1.0,148.796151,160.539614,1.719046,808.649372,2.0,1.0,2.0,7.890930,146.379752,265.245490,12.152042,1.717540,1.296735,0.725912,0.331461,2.0,250.218713,1.044898,189.083307,-0.006090,1.0,adversarial data MV101.csv
2,2,2.355540,798.010734,1.0,1.0,1.0,265.127935,8.459017,318.241215,0.001396,1.0,1.0,1.0,1.0,1.0,1.0,2.035324,0.001713,1011.825466,1.0,1.0,1.0,1.0,1.0,1.0,148.800387,160.024572,1.725891,804.420456,2.0,1.0,2.0,7.891634,146.030651,265.734079,12.140451,1.721561,1.299919,0.735078,0.297554,2.0,250.183264,1.054687,189.206408,-0.003841,1.0,adversarial data MV101.csv
3,3,2.354496,797.989788,1.0,1.0,1.0,253.385286,8.426049,321.637510,0.006508,1.0,1.0,1.0,1.0,1.0,1.0,2.377850,0.002324,1010.720019,1.0,1.0,1.0,1.0,1.0,1.0,148.803866,188.737769,1.701453,806.528536,2.0,1.0,2.0,7.816749,174.258879,255.845639,10.743749,1.702426,1.267959,0.731900,0.309655,2.0,254.563758,1.577010,193.489209,-0.017849,1.0,adversarial data MV101.csv
4,4,2.363369,798.008224,1.0,1.0,1.0,264.404741,8.429809,319.624981,0.000079,1.0,1.0,1.0,1.0,1.0,1.0,2.180473,0.008452,1009.276720,1.0,1.0,1.0,1.0,1.0,1.0,148.812361,156.631448,1.730998,805.528197,2.0,1.0,2.0,7.878850,144.754782,267.784250,13.074569,1.733083,1.300801,0.723768,0.301200,2.0,247.282752,1.087625,186.479210,0.013039,1.0,adversarial data MV101.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,23995,2.637157,501.151634,2.0,2.0,1.0,269.199622,8.346118,329.542748,2.448467,2.0,1.0,2.0,1.0,2.0,1.0,2.821165,0.391589,883.151183,1.0,1.0,2.0,2.0,1.0,1.0,148.812867,164.154134,1.715751,905.031163,2.0,1.0,2.0,7.906616,150.454327,268.101243,11.720777,1.710312,1.285438,0.729734,0.303537,2.0,252.517597,1.138675,191.361859,-0.019504,1.0,adversarial data P602.csv
23996,23996,2.574567,500.921131,2.0,2.0,1.0,261.857022,8.371141,331.862380,2.443651,2.0,1.0,2.0,1.0,2.0,1.0,2.616028,0.008583,859.049642,1.0,1.0,1.0,1.0,1.0,1.0,148.776819,156.905658,1.725428,917.158083,2.0,1.0,2.0,7.882831,145.242773,261.222695,12.093071,1.730310,1.270769,0.715863,0.320599,2.0,249.790415,1.215037,188.622294,0.011681,1.0,adversarial data P602.csv
23997,23997,2.651911,501.878047,2.0,2.0,1.0,266.978751,8.370682,333.000656,2.466457,2.0,1.0,2.0,1.0,2.0,1.0,2.718891,0.369336,884.135597,1.0,1.0,2.0,2.0,1.0,1.0,148.791737,170.004080,1.702204,909.764147,2.0,1.0,2.0,7.865807,159.977072,266.337494,10.921891,1.703884,1.245241,0.740901,0.305249,2.0,256.234660,1.096783,194.654079,-0.009960,1.0,adversarial data P602.csv
23998,23998,2.625606,505.099162,2.0,2.0,1.0,270.751370,8.364218,331.058702,2.431943,2.0,1.0,2.0,1.0,2.0,1.0,2.673310,0.377092,879.149307,1.0,1.0,2.0,2.0,1.0,1.0,148.809192,164.388889,1.696747,906.885683,2.0,1.0,2.0,7.890366,150.147549,269.610266,11.530661,1.699644,1.271743,0.734058,0.328960,2.0,252.863729,1.241317,191.737341,0.005233,1.0,adversarial data P602.csv


In [14]:
del adv_df['Unnamed: 0']

In [15]:
adv_df['NORATK']=1

In [16]:
adv_df

Unnamed: 0,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,MV201,P201,P203,P204,P205,P206,DPIT301,FIT301,LIT301,MV301,MV302,MV303,MV304,P301,P302,AIT401,AIT402,FIT401,LIT401,P402,P403,UV401,AIT501,AIT502,AIT503,AIT504,FIT501,FIT502,FIT503,FIT504,P501,PIT501,PIT502,PIT503,FIT601,P602,source_file,NORATK
0,2.346866,798.013444,1.0,1.0,1.0,263.332196,8.449136,318.786179,-0.004715,1.0,1.0,1.0,1.0,1.0,1.0,2.167212,0.006726,1012.488516,1.0,1.0,1.0,1.0,1.0,1.0,148.790364,155.079071,1.729572,803.780944,2.0,1.0,2.0,7.886393,143.626477,265.252661,12.428313,1.723680,1.310269,0.726398,0.298903,2.0,248.799418,0.928726,187.843808,-0.007467,1.0,adversarial data MV101.csv,1
1,2.360944,797.997745,1.0,1.0,1.0,263.944089,8.420539,319.618798,0.002538,1.0,1.0,1.0,1.0,1.0,1.0,2.178258,0.018014,1011.428858,1.0,1.0,1.0,1.0,1.0,1.0,148.796151,160.539614,1.719046,808.649372,2.0,1.0,2.0,7.890930,146.379752,265.245490,12.152042,1.717540,1.296735,0.725912,0.331461,2.0,250.218713,1.044898,189.083307,-0.006090,1.0,adversarial data MV101.csv,1
2,2.355540,798.010734,1.0,1.0,1.0,265.127935,8.459017,318.241215,0.001396,1.0,1.0,1.0,1.0,1.0,1.0,2.035324,0.001713,1011.825466,1.0,1.0,1.0,1.0,1.0,1.0,148.800387,160.024572,1.725891,804.420456,2.0,1.0,2.0,7.891634,146.030651,265.734079,12.140451,1.721561,1.299919,0.735078,0.297554,2.0,250.183264,1.054687,189.206408,-0.003841,1.0,adversarial data MV101.csv,1
3,2.354496,797.989788,1.0,1.0,1.0,253.385286,8.426049,321.637510,0.006508,1.0,1.0,1.0,1.0,1.0,1.0,2.377850,0.002324,1010.720019,1.0,1.0,1.0,1.0,1.0,1.0,148.803866,188.737769,1.701453,806.528536,2.0,1.0,2.0,7.816749,174.258879,255.845639,10.743749,1.702426,1.267959,0.731900,0.309655,2.0,254.563758,1.577010,193.489209,-0.017849,1.0,adversarial data MV101.csv,1
4,2.363369,798.008224,1.0,1.0,1.0,264.404741,8.429809,319.624981,0.000079,1.0,1.0,1.0,1.0,1.0,1.0,2.180473,0.008452,1009.276720,1.0,1.0,1.0,1.0,1.0,1.0,148.812361,156.631448,1.730998,805.528197,2.0,1.0,2.0,7.878850,144.754782,267.784250,13.074569,1.733083,1.300801,0.723768,0.301200,2.0,247.282752,1.087625,186.479210,0.013039,1.0,adversarial data MV101.csv,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,2.637157,501.151634,2.0,2.0,1.0,269.199622,8.346118,329.542748,2.448467,2.0,1.0,2.0,1.0,2.0,1.0,2.821165,0.391589,883.151183,1.0,1.0,2.0,2.0,1.0,1.0,148.812867,164.154134,1.715751,905.031163,2.0,1.0,2.0,7.906616,150.454327,268.101243,11.720777,1.710312,1.285438,0.729734,0.303537,2.0,252.517597,1.138675,191.361859,-0.019504,1.0,adversarial data P602.csv,1
23996,2.574567,500.921131,2.0,2.0,1.0,261.857022,8.371141,331.862380,2.443651,2.0,1.0,2.0,1.0,2.0,1.0,2.616028,0.008583,859.049642,1.0,1.0,1.0,1.0,1.0,1.0,148.776819,156.905658,1.725428,917.158083,2.0,1.0,2.0,7.882831,145.242773,261.222695,12.093071,1.730310,1.270769,0.715863,0.320599,2.0,249.790415,1.215037,188.622294,0.011681,1.0,adversarial data P602.csv,1
23997,2.651911,501.878047,2.0,2.0,1.0,266.978751,8.370682,333.000656,2.466457,2.0,1.0,2.0,1.0,2.0,1.0,2.718891,0.369336,884.135597,1.0,1.0,2.0,2.0,1.0,1.0,148.791737,170.004080,1.702204,909.764147,2.0,1.0,2.0,7.865807,159.977072,266.337494,10.921891,1.703884,1.245241,0.740901,0.305249,2.0,256.234660,1.096783,194.654079,-0.009960,1.0,adversarial data P602.csv,1
23998,2.625606,505.099162,2.0,2.0,1.0,270.751370,8.364218,331.058702,2.431943,2.0,1.0,2.0,1.0,2.0,1.0,2.673310,0.377092,879.149307,1.0,1.0,2.0,2.0,1.0,1.0,148.809192,164.388889,1.696747,906.885683,2.0,1.0,2.0,7.890366,150.147549,269.610266,11.530661,1.699644,1.271743,0.734058,0.328960,2.0,252.863729,1.241317,191.737341,0.005233,1.0,adversarial data P602.csv,1


In [17]:
sample_data=adv_df.copy()

In [18]:
sample_data

Unnamed: 0,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,MV201,P201,P203,P204,P205,P206,DPIT301,FIT301,LIT301,MV301,MV302,MV303,MV304,P301,P302,AIT401,AIT402,FIT401,LIT401,P402,P403,UV401,AIT501,AIT502,AIT503,AIT504,FIT501,FIT502,FIT503,FIT504,P501,PIT501,PIT502,PIT503,FIT601,P602,source_file,NORATK
0,2.346866,798.013444,1.0,1.0,1.0,263.332196,8.449136,318.786179,-0.004715,1.0,1.0,1.0,1.0,1.0,1.0,2.167212,0.006726,1012.488516,1.0,1.0,1.0,1.0,1.0,1.0,148.790364,155.079071,1.729572,803.780944,2.0,1.0,2.0,7.886393,143.626477,265.252661,12.428313,1.723680,1.310269,0.726398,0.298903,2.0,248.799418,0.928726,187.843808,-0.007467,1.0,adversarial data MV101.csv,1
1,2.360944,797.997745,1.0,1.0,1.0,263.944089,8.420539,319.618798,0.002538,1.0,1.0,1.0,1.0,1.0,1.0,2.178258,0.018014,1011.428858,1.0,1.0,1.0,1.0,1.0,1.0,148.796151,160.539614,1.719046,808.649372,2.0,1.0,2.0,7.890930,146.379752,265.245490,12.152042,1.717540,1.296735,0.725912,0.331461,2.0,250.218713,1.044898,189.083307,-0.006090,1.0,adversarial data MV101.csv,1
2,2.355540,798.010734,1.0,1.0,1.0,265.127935,8.459017,318.241215,0.001396,1.0,1.0,1.0,1.0,1.0,1.0,2.035324,0.001713,1011.825466,1.0,1.0,1.0,1.0,1.0,1.0,148.800387,160.024572,1.725891,804.420456,2.0,1.0,2.0,7.891634,146.030651,265.734079,12.140451,1.721561,1.299919,0.735078,0.297554,2.0,250.183264,1.054687,189.206408,-0.003841,1.0,adversarial data MV101.csv,1
3,2.354496,797.989788,1.0,1.0,1.0,253.385286,8.426049,321.637510,0.006508,1.0,1.0,1.0,1.0,1.0,1.0,2.377850,0.002324,1010.720019,1.0,1.0,1.0,1.0,1.0,1.0,148.803866,188.737769,1.701453,806.528536,2.0,1.0,2.0,7.816749,174.258879,255.845639,10.743749,1.702426,1.267959,0.731900,0.309655,2.0,254.563758,1.577010,193.489209,-0.017849,1.0,adversarial data MV101.csv,1
4,2.363369,798.008224,1.0,1.0,1.0,264.404741,8.429809,319.624981,0.000079,1.0,1.0,1.0,1.0,1.0,1.0,2.180473,0.008452,1009.276720,1.0,1.0,1.0,1.0,1.0,1.0,148.812361,156.631448,1.730998,805.528197,2.0,1.0,2.0,7.878850,144.754782,267.784250,13.074569,1.733083,1.300801,0.723768,0.301200,2.0,247.282752,1.087625,186.479210,0.013039,1.0,adversarial data MV101.csv,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,2.637157,501.151634,2.0,2.0,1.0,269.199622,8.346118,329.542748,2.448467,2.0,1.0,2.0,1.0,2.0,1.0,2.821165,0.391589,883.151183,1.0,1.0,2.0,2.0,1.0,1.0,148.812867,164.154134,1.715751,905.031163,2.0,1.0,2.0,7.906616,150.454327,268.101243,11.720777,1.710312,1.285438,0.729734,0.303537,2.0,252.517597,1.138675,191.361859,-0.019504,1.0,adversarial data P602.csv,1
23996,2.574567,500.921131,2.0,2.0,1.0,261.857022,8.371141,331.862380,2.443651,2.0,1.0,2.0,1.0,2.0,1.0,2.616028,0.008583,859.049642,1.0,1.0,1.0,1.0,1.0,1.0,148.776819,156.905658,1.725428,917.158083,2.0,1.0,2.0,7.882831,145.242773,261.222695,12.093071,1.730310,1.270769,0.715863,0.320599,2.0,249.790415,1.215037,188.622294,0.011681,1.0,adversarial data P602.csv,1
23997,2.651911,501.878047,2.0,2.0,1.0,266.978751,8.370682,333.000656,2.466457,2.0,1.0,2.0,1.0,2.0,1.0,2.718891,0.369336,884.135597,1.0,1.0,2.0,2.0,1.0,1.0,148.791737,170.004080,1.702204,909.764147,2.0,1.0,2.0,7.865807,159.977072,266.337494,10.921891,1.703884,1.245241,0.740901,0.305249,2.0,256.234660,1.096783,194.654079,-0.009960,1.0,adversarial data P602.csv,1
23998,2.625606,505.099162,2.0,2.0,1.0,270.751370,8.364218,331.058702,2.431943,2.0,1.0,2.0,1.0,2.0,1.0,2.673310,0.377092,879.149307,1.0,1.0,2.0,2.0,1.0,1.0,148.809192,164.388889,1.696747,906.885683,2.0,1.0,2.0,7.890366,150.147549,269.610266,11.530661,1.699644,1.271743,0.734058,0.328960,2.0,252.863729,1.241317,191.737341,0.005233,1.0,adversarial data P602.csv,1


In [21]:
del sample_data['source_file']

In [22]:
X_sample = sample_data.iloc[:, :-1]
y_sample = sample_data.iloc[:, -1]

In [25]:
# List of model names
model_names = ['Naive Bayes', 'KNN', 'Random Forest', 'SGD', 'GBM']

# Dictionary to store results
sample_results = {}

for model_name in model_names:
    # Load the model
    model_path = f'models/{model_name}.joblib'
    
    if os.path.exists(model_path):
        model = joblib.load(model_path)
        
        # Predict on the sample data
        y_pred_sample = model.predict(X_sample)
        
        # Calculate accuracy (this will reflect the model's ability to identify attacks)
        accuracy_sample = accuracy_score(y_sample, y_pred_sample)
        cr = classification_report(y_sample, y_pred_sample)
        
        # Generate confusion matrix
        cm_sample = confusion_matrix(y_sample, y_pred_sample)
        
        # Store results
        sample_results[model_name] = {
            'Accuracy': accuracy_sample,
            'Confusion Matrix': cm_sample,
            'Classification Report':cr
        }
        
        print(f"{model_name} - Accuracy on sample data: {accuracy_sample:.4f}")
        print(f"{model_name} - Confusion Matrix on sample data:\n{cm_sample}\n")
        print(f"{model_name} - classification_report:\n{cr}\n")
    else:
        print(f"Model file for {model_name} not found.")


Naive Bayes - Accuracy on sample data: 0.0001
Naive Bayes - Confusion Matrix on sample data:
[[    0     0]
 [23997     3]]

Naive Bayes - classification_report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.00      0.00     24000

    accuracy                           0.00     24000
   macro avg       0.50      0.00      0.00     24000
weighted avg       1.00      0.00      0.00     24000


KNN - Accuracy on sample data: 0.0000
KNN - Confusion Matrix on sample data:
[[    0     0]
 [24000     0]]

KNN - classification_report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00   24000.0

    accuracy                           0.00   24000.0
   macro avg       0.00      0.00      0.00   24000.0
weighted avg       0.00      0.00      0.00   24000.0


Random Forest - Accuracy on sample data: 0.0

In [26]:
sample_results

{'Naive Bayes': {'Accuracy': 0.000125,
  'Confusion Matrix': array([[    0,     0],
         [23997,     3]], dtype=int64),
  'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00         0\n           1       1.00      0.00      0.00     24000\n\n    accuracy                           0.00     24000\n   macro avg       0.50      0.00      0.00     24000\nweighted avg       1.00      0.00      0.00     24000\n'},
 'KNN': {'Accuracy': 0.0,
  'Confusion Matrix': array([[    0,     0],
         [24000,     0]], dtype=int64),
  'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00       0.0\n           1       0.00      0.00      0.00   24000.0\n\n    accuracy                           0.00   24000.0\n   macro avg       0.00      0.00      0.00   24000.0\nweighted avg       0.00      0.00      0.00   24000.0\n'},
 'Random Forest': {'Accuracy': 0.

## Accuracy on Stealth Sample

In [27]:
from tabulate import tabulate

# Prepare data for tabulation
table = []
for model_name, result in sample_results.items():
    table.append([model_name, f"{result['Accuracy']:.4f}", result['Confusion Matrix'], result['Classification Report']])

# Print the table
print(tabulate(table, headers=["Model", "Accuracy", "Confusion Matrix","Classification Report"]))



Model            Accuracy  Confusion Matrix    Classification Report
-------------  ----------  ------------------  -----------------------------------------------------
Naive Bayes        0.0001  [[    0     0]      precision    recall  f1-score   support
                            [23997     3]]
                                                          0       0.00      0.00      0.00         0
                                                          1       1.00      0.00      0.00     24000

                                                   accuracy                           0.00     24000
                                                  macro avg       0.50      0.00      0.00     24000
                                               weighted avg       1.00      0.00      0.00     24000
KNN                0       [[    0     0]      precision    recall  f1-score   support
                            [24000     0]]
                                                          0     

## Accuracy On Train - Test Dataset

In [28]:
from tabulate import tabulate

# Prepare data for tabulation
table = []
for model_name, result in results.items():
    table.append([model_name, f"{result['Accuracy']:.4f}", result['Confusion Matrix'], result['Classification Report']])

# Print the table
print(tabulate(table, headers=["Model", "Accuracy", "Confusion Matrix","Classification Report"]))



Model            Accuracy  Confusion Matrix    Classification Report
-------------  ----------  ------------------  -----------------------------------------------------
Naive Bayes        0.9549  [[20000     0]      precision    recall  f1-score   support
                            [ 1354  8646]]
                                                          0       0.94      1.00      0.97     20000
                                                          1       1.00      0.86      0.93     10000

                                                   accuracy                           0.95     30000
                                                  macro avg       0.97      0.93      0.95     30000
                                               weighted avg       0.96      0.95      0.95     30000
KNN                1       [[20000     0]      precision    recall  f1-score   support
                            [    0 10000]]
                                                          0     