In [1]:
import pandas as pd
from glob import glob
from joblib import dump, load
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, balanced_accuracy_score, roc_auc_score, confusion_matrix

In [2]:
def get_csv(uri):
    csvs = []
    if "*" in uri:
        all_csv = glob(uri)
        [ csvs.append(pd.read_csv(uri)) for uri in all_csv ]
        return pd.concat(csvs)
    return pd.read_csv(uri)

In [3]:
# cols = ['Label', 'SYN Flag Count', 'Fwd Seg Size Min', 'FWD Init Win Bytes',
#        'FIN Flag Count', 'Average Packet Size', 'Packet Length Mean',
#        'Packet Length Max', 'Protocol', 'Idle Max', 'Idle Mean', 'Idle Min',
#        'Flow Duration', 'Fwd IAT Total', 'Fwd Packet Length Max',
#        'Fwd Segment Size Avg', 'Fwd Packet Length Mean',
#        'Bwd Packet Length Mean', 'Bwd Segment Size Avg', 'Packet Length Std',
#        'Bwd IAT Total', 'Bwd Packet Length Max']

cols = ['Label', 'Protocol', 'Flow Duration', 'Fwd Packet Length Max',
       'Fwd Packet Length Mean', 'Bwd Packet Length Mean', 'Fwd IAT Total',
       'Bwd IAT Total', 'Packet Length Max', 'Packet Length Mean',
       'Packet Length Std', 'FIN Flag Count', 'SYN Flag Count',
       'Down/Up Ratio', 'Average Packet Size', 'Fwd Segment Size Avg',
       'Bwd Segment Size Avg', 'FWD Init Win Bytes', 'Fwd Seg Size Min',
       'Idle Mean', 'Idle Max', 'Idle Min']

models = [
#     {"model": "AdaBoost"},
#     {"model": "DecisionTree"},
#     {"model": "SVM"},
#     {"model": "ANN"},
#     {"model": "LogisticRegression"},
#     {"model": "RandomForest"},
#     {"model": "XGBoost"},
#     {"model": "Bagging"},
#     {"model": "ModelWithoutScaler/AdaBoost"},
#     {"model": "ModelWithoutScaler/LogisticRegression"},
#     {"model": "ModelWithoutScaler/RandomForest"},
#     {"model": "ModelWithoutScaler/DecisionTree"},
#     {"model": "ModelWithoutScaler/SVM"},
#     {"model": "ModelWithoutScaler/ANN"},
#     {"model": "ModelWithoutScaler/XGBoost"},
#     {"model": "ModelWithoutScaler/Bagging"},
#     {"model": "ModelStandardScaler/AdaBoost"},
#     {"model": "ModelStandardScaler/LogisticRegression"},
#     {"model": "ModelStandardScaler/RandomForest"},
#     {"model": "ModelStandardScaler/DecisionTree"},
#     {"model": "ModelStandardScaler/SVM"},
#     {"model": "ModelStandardScaler/ANN"},
#     {"model": "ModelStandardScaler/XGBoost"},
#     {"model": "ModelStandardScaler/Bagging"},
    {"model": "ModelMinMax/AdaBoost"},
    {"model": "ModelMinMax/LogisticRegression"},
    {"model": "ModelMinMax/RandomForest"},
    {"model": "ModelMinMax/DecisionTree"},
    {"model": "ModelMinMax/SVM"},
    {"model": "ModelMinMax/ANN"},
    {"model": "ModelMinMax/XGBoost"},
    {"model": "ModelMinMax/Bagging"},
]

collection = [
    {"dataset": "Unseen Dataset", "path": "/media/kmdr7/Seagate/TA/DATASETS/newUnseenDataset.csv", "type": -1},
    {"dataset": "Malware 48-1 *", "path": "/media/kmdr7/Seagate/DATASETS/IOT-23/CTU-IoT-Malware-Capture-48-1/out2/*", "type": 1},
    {"dataset": "Malware 48-1 1", "path": "/media/kmdr7/Seagate/DATASETS/IOT-23/CTU-IoT-Malware-Capture-48-1/out2/malware-48-1_00000_20190301011533.pcap_Flow.csv", "type": 1},
    {"dataset": "Malware 48-1 2", "path": "/media/kmdr7/Seagate/DATASETS/IOT-23/CTU-IoT-Malware-Capture-48-1/out2/malware-48-1_00001_20190301041534.pcap_Flow.csv", "type": 1},
    {"dataset": "Malware 48-1 3", "path": "/media/kmdr7/Seagate/DATASETS/IOT-23/CTU-IoT-Malware-Capture-48-1/out2/malware-48-1_00002_20190301071534.pcap_Flow.csv", "type": 1},
    {"dataset": "Malware 48-1 4", "path": "/media/kmdr7/Seagate/DATASETS/IOT-23/CTU-IoT-Malware-Capture-48-1/out2/malware-48-1_00003_20190301101534.pcap_Flow.csv", "type": 1},
    {"dataset": "Benign IoTTT *", "path": "/media/kmdr7/Seagate/DATASETS/IoT-Traffic-Traces/out/*", "type": 0},
    {"dataset": "Benign IoTTT samp 1", "path": "/media/kmdr7/Seagate/DATASETS/IoT-Traffic-Traces/out/16-09-23.pcap_Flow.csv", "type": 0},
    {"dataset": "Benign IoTTT samp 2", "path": "/media/kmdr7/Seagate/DATASETS/IoT-Traffic-Traces/out/16-10-07.pcap_Flow.csv", "type": 0},
    {"dataset": "Benign IoTTT samp 3", "path": "/media/kmdr7/Seagate/DATASETS/IoT-Traffic-Traces/out/16-09-26.pcap_Flow.csv", "type": 0},
    {"dataset": "Benign IoTTT samp 4", "path": "/media/kmdr7/Seagate/DATASETS/IoT-Traffic-Traces/out/16-10-12.pcap_Flow.csv", "type": 0},
]

In [4]:
for model in models:
    joblib = "/media/kmdr7/Seagate/TA/MODELS/" + model["model"] + ".joblib"
    for col in collection:
        datates = get_csv(col["path"])[cols]

        if col["type"] == 1:
            datates["Label"] = 1
        elif col["type"] == 0:
            datates["Label"] = 0

        X = datates.drop(columns=["Label"])
        y = datates["Label"]

        scaler = StandardScaler()
        X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

        clf = load(joblib)
        pred = clf.predict(X)

        tn, fp, fn, tp = confusion_matrix(y, pred, labels=[0,1]).ravel()
        acc = accuracy_score(y, pred)
        bacc = balanced_accuracy_score(y, pred)
        recall = recall_score(y, pred)
        f1 = f1_score(y, pred)
        precision = precision_score(y, pred)

        try: roc_auc = roc_auc_score(y, pred)
        except: roc_auc = 0

        import requests
        requests.post(
            "http://localhost:8000/api/v1/prediction",
            json={
                "code": "005",
                "algorithm": model["model"],
                "dataset": col["dataset"],
                "matrix": {
                    "tn": float(tn),
                    "fp": float(fp),
                    "fn": float(fn),
                    "tp": float(tp)
                },
                "accuracy": float(acc),
                "balanced_accuracy": float(bacc),
                "recall": float(recall),
                "f1": float(f1),
                "precision": float(precision),
                "roc_auc": float(roc_auc),
            }
        )

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr