<p style="font-size:30px; color:red; font-weight:bold; font-style:italic">Self-case study 1 title: Network intrusion detection tags</p>

<p style="font-style:italic; font-size:22px; font-weight:bold">Table of contents</p>
<p style="font-style:italic; font-size:17px; font-weight:bold; color:green;">1.&emsp;Final function-1</p>
<p style="font-style:italic; font-size:17px; font-weight:bold; color:green;">2.&emsp;Final function-2</p>
<p style="font-style:italic; font-size:17px; font-weight:bold; color:green;">3.&emsp;Loading sample dataset to apply final function-1 and function-2</p>


In [1]:
### import nids_functions before executing this notebook, provided alongwith
import nids_functions as nf
import numpy as np
import pandas as pd

from warnings import filterwarnings
filterwarnings('ignore')

from datetime import datetime as dt

<p style="font-size:22px; color:black; font-weight:bold; font-style:italic">1.&emsp;Final Function 1</p>

In [2]:
def final_fun_1(x):
    '''
    this function will return the predicted class labels in the given datset / datapoint
    and it assumes no NaNs given to the function in dataset
    
    x: data set having shape of x.shape == (n, 48)
    '''
    st = dt.now()
    ### features name to make a dataframe and to perform preprocessing, FE operations on them
    features_name = ['srcip', 'sport', 'dstip', 'dsport', 'proto', 'state', 'dur', 
                     'sbytes', 'dbytes', 'sttl', 'dttl', 'sloss', 'dloss', 'service', 
                     'Sload', 'Dload', 'Spkts', 'Dpkts', 'swin', 'dwin', 'stcpb', 
                     'dtcpb', 'smeansz', 'dmeansz', 'trans_depth', 'res_bdy_len', 
                     'Sjit', 'Djit', 'Stime', 'Ltime', 'Sintpkt', 'Dintpkt', 'tcprtt', 
                     'synack', 'ackdat', 'is_sm_ips_ports', 'ct_state_ttl', 
                     'ct_flw_http_mthd', 'is_ftp_login', 'ct_ftp_cmd', 'ct_srv_src', 
                     'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ ltm', 'ct_src_dport_ltm', 
                     'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat']
    ### final features name after preprocessing to be keep in dataset
    final_columns =  ['proto', 'state', 'dur', 'sbytes', 'dbytes', 'sttl', 'dttl', 'service',
                       'Sload', 'Dload', 'Spkts', 'stcpb', 'dtcpb', 'smeansz', 'dmeansz',
                       'trans_depth', 'res_bdy_len', 'Sjit', 'Djit', 'Stime', 'Sintpkt',
                       'Dintpkt', 'tcprtt', 'ct_state_ttl', 'ct_flw_http_mthd', 'is_ftp_login',
                       'ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ ltm',
                       'ct_src_dport_ltm', 'ct_dst_src_ltm']
    
    ### making dataframe to perform operations same as performed on train data
    df = pd.DataFrame(data=x, columns=features_name)
    ### converting categorical to numericals
    df = nf.categorical_to_numerical(df)
    ### taking final df after preprocessing to perform feature engineering
    df = df[final_columns]
    ### performing feature engineering operations
    df = nf.feature_engineering(df)
    ### scaling data after feature engineering
    scaled_data = nf.scale_features(df)
    ### stacking categorical features with the scaled features and converting them into sparse martrix
    data = nf.to_sparse(df, scaled_data)
    ### predicting class label on the dataset
    predicted_label = nf.predict(data)
    ### returning class label
    return predicted_label

<p style="font-size:22px; color:black; font-weight:bold; font-style:italic">2.&emsp;Final Function 2</p>

In [3]:
def final_fun_2(x, y_true):
    '''
    this function will print the time taken in class label prediction and the f1_score
    and it assumes no NaNs given to the function in dataset
    
    x: dataset to be predict class label
    
    y_true: the true class labels on the dataset
    '''
    ### start time
    st = dt.now() 
    ### predicting class labels
    y_preds = final_fun_1(x)
    ### calculating f1 score on predicted and actual class labels
    auc, f1score = nf.calculate_metric_score(y_true, y_preds)
    ### printing auc and f1-score on the given data
    print(f"AUC is: {auc}; f1_score is: {f1score}")
    ### printing time taken 
    print(f"time taken: {dt.now() - st}")
    
    

<p style="font-size:22px; color:black; font-weight:bold; font-style:italic">3.&emsp;Loading sample dataset to apply final function-1 and function-2</p>

In [4]:
### loading sample dataset to apply final function-1 and final function-2
d = np.load('demo_array.npy', allow_pickle=True)
### actual class labels of these functions
y = np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1])

In [9]:
### applying final function-1 on single datapoint from sample dataset
x = d[0].reshape(1,-1)
st = dt.now()
y_preds = final_fun_1(x=x)[0]
print(f"Predicted class label is: {y_preds}")
print(f"time taken: {dt.now() - st}")

Predicted class label is: 0
time taken: 0:00:00.021003


In [10]:
### applying final function-2 on the whole sample dataset with the true class labels
final_fun_2(d, y)

AUC is: 1.0; f1_score is: 1.0
time taken: 0:00:00.021995
