In [125]:
# # Load the Drive helper and mount
# from google.colab import drive

# # This will prompt for authorization.
# drive.mount('/content/drive')

# %cd '/content/drive/MyDrive/Colab Notebooks/C73_A2_1/data'
# !ls

# data_dir = '/content/drive/MyDrive/Colab Notebooks/C73_A2_1/data'
# result_dir = '/content/drive/MyDrive/Colab Notebooks/C73_A2_1/result'

In [126]:
data_dir = 'data'
result_dir = 'result'

# Import

In [127]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from IPython.display import display

import warnings
warnings.filterwarnings("ignore")

# Read data

In [128]:
""" Load original data (aggregated and unaggregated) """
af = 'u'

pf1 = '7200-3600'
pf = f'{pf1}.__2__'
ra = '__97.39__'

df = pd.read_csv(f'{data_dir}/t.{af}4.{pf1}.csv', index_col=[0]).reset_index().drop(columns=['index'])
# dfo = pd.read_csv(f'{data_dir}/t.{af}1.csv', index_col=[0])
# dfo.head()

In [129]:
""" Load model """
model = pickle.load(open(f'{result_dir}/{af}4-5.{pf}.model_lr.{ra}.pkl', 'rb'))

""" Load preprocessed data used for feeding the model """
X_test = np.load(f'{result_dir}/{af}4-5.{pf}.data_lr.{ra}.t.X.npy')
y_test = np.load(f'{result_dir}/{af}4-5.{pf}.data_lr.{ra}.t.y.npy')

""" Load predicted output for test set """
y_pred = np.load(f'{result_dir}/{af}4-5.{pf}.output_lr.{ra}.t.y_pred.npy')

""" Load used features """
fts_names = [line.strip() for line in open(f'{data_dir}/{af}4-3.fts_cols.{pf}.txt').readlines()]

In [130]:
print(len(df))

18226


# Evaluate the model

In [131]:
from sklearn.metrics import classification_report, confusion_matrix, PrecisionRecallDisplay, roc_auc_score

def evaluate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    print(cm)
    print(classification_report(y_true, y_pred, digits=4))
    roc_auc = roc_auc_score(y_true, y_pred)
    print(roc_auc)

    # display = PrecisionRecallDisplay.from_predictions(y_true, y_pred, name="LinearSVC")
    # _ = display.ax_.set_title("2-class Precision-Recall curve")

In [132]:
evaluate(y_test, y_pred)

[[17219   947]
 [    0    60]]
              precision    recall  f1-score   support

           0     1.0000    0.9479    0.9732     18166
           1     0.0596    1.0000    0.1125        60

    accuracy                         0.9480     18226
   macro avg     0.5298    0.9739    0.5429     18226
weighted avg     0.9969    0.9480    0.9704     18226

0.9739348232962677


# Set threshold

In [133]:
""" Update result based on score. Only those have bot score (score for class 1) > 0.75 are marked as bot """
y_bot_score = model.predict_proba(X_test)[:,1]

df_pred = pd.DataFrame(y_pred, columns=['Label_Pred'])
df_bot_score = pd.DataFrame(y_bot_score, columns=['Bot_Score'])

dfn = pd.concat([df.reset_index(), df_pred, df_bot_score], axis=1).drop(columns=['index'])

dfn['Label_Pred_Thresh'] = 0
dfn.loc[dfn['Bot_Score'] > 0.75, 'Label_Pred_Thresh'] = 1

In [135]:
evaluate(y_test, dfn['Label_Pred_Thresh'].values)

[[17475   691]
 [    0    60]]
              precision    recall  f1-score   support

           0     1.0000    0.9620    0.9806     18166
           1     0.0799    1.0000    0.1480        60

    accuracy                         0.9621     18226
   macro avg     0.5399    0.9810    0.5643     18226
weighted avg     0.9970    0.9621    0.9779     18226

0.9809809534294837


In [136]:
# dfn.loc[dfn['Label'] == 5]

# Analysis on detected records

In [137]:
""" 
For each IP detected as botnet, 
check how many records of this IP are classified as malicious,
=> how many flows of this IP are classified as malicious
"""
pred_field = 'Label_Pred_Thresh'
detected_gb_src = dfn.loc[(dfn[pred_field] == 1)].groupby(['Conversation'])

# df_result_conv = pd.DataFrame()
rows = {}

for key,item in detected_gb_src:
    n_agg = len(dfn.loc[dfn['Conversation'] == key])
    n_agg_detected = len(dfn.loc[(dfn[pred_field] == 1) & (dfn['Conversation'] == key)])
    n_flows = dfn.loc[(dfn['Conversation'] == key)]['n_flows'].sum()
    n_flows_detected = dfn.loc[(dfn[pred_field] == 1) & (dfn['Conversation'] == key)]['n_flows'].sum()

    p_agg = n_agg_detected/n_agg

    rows[key] = {
        'n_agg_total': n_agg,
        'n_agg_detected': n_agg_detected,
        'n_flows_total': n_flows,
        'n_flows_detected': n_flows_detected,
        'p_agg': p_agg
    }

df_result_conv = pd.DataFrame.from_dict(rows, orient='index').sort_values(['p_agg','n_agg_detected','n_agg_total'], ascending=[False, False, False])
df_result_conv

Unnamed: 0,n_agg_total,n_agg_detected,n_flows_total,n_flows_detected,p_agg
41.232.73.23 -> 150.35.87.168,60,60,120,120,1.000000
150.35.87.62 -> 189.216.27.38,5,5,28,28,1.000000
150.35.87.62 -> 101.204.145.180,4,4,10,10,1.000000
150.35.87.62 -> 210.49.127.175,4,4,10,10,1.000000
150.35.87.62 -> 44.135.158.222,4,4,19,19,1.000000
...,...,...,...,...,...
150.35.87.174 -> 72.168.158.73,15,1,200,2,0.066667
150.35.87.62 -> 77.128.235.216,15,1,498,2,0.066667
150.35.87.62 -> 209.3.175.130,44,2,88,4,0.045455
150.35.87.174 -> 150.35.83.12,69,2,65130,4,0.028986


Conversation `41.232.73.23 -> 150.35.87.168`'s number of records and flows being detected as bot is much higher than the rest.  
We'll choose this conversation to generate adversarial samples.  