In [1]:
import pandas as pd
import numpy as np
import sys
import sklearn
import io
import random

In [3]:
train_url = 'https://raw.githubusercontent.com/merteroglu/NSL-KDD-Network-Instrusion-Detection/master/NSL_KDD_Train.csv'
test_url = 'https://raw.githubusercontent.com/merteroglu/NSL-KDD-Network-Instrusion-Detection/master/NSL_KDD_Test.csv'



In [5]:
col_names = ["duration","protocol_type","service","flag","src_bytes",
    "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins",
    "logged_in","num_compromised","root_shell","su_attempted","num_root",
    "num_file_creations","num_shells","num_access_files","num_outbound_cmds",
    "is_host_login","is_guest_login","count","srv_count","serror_rate",
    "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate",
    "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count",
    "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate",
    "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate",
    "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"]


df = pd.read_csv(train_url,header=None, names = col_names)

df_test = pd.read_csv(test_url, header=None, names = col_names)

print('Dimensions of the Training set:',df.shape)
print('Dimensions of the Test set:',df_test.shape)

Dimensions of the Training set: (125973, 42)
Dimensions of the Test set: (22544, 42)


In [6]:
df.head(5)

Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,tcp,ftp_data,SF,491,0,0,0,0,0,...,25,0.17,0.03,0.17,0.0,0.0,0.0,0.05,0.0,normal
1,0,udp,other,SF,146,0,0,0,0,0,...,1,0.0,0.6,0.88,0.0,0.0,0.0,0.0,0.0,normal
2,0,tcp,private,S0,0,0,0,0,0,0,...,26,0.1,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune
3,0,tcp,http,SF,232,8153,0,0,0,0,...,255,1.0,0.0,0.03,0.04,0.03,0.01,0.0,0.01,normal
4,0,tcp,http,SF,199,420,0,0,0,0,...,255,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal


In [7]:
print('Label distribution Training set:')
print(df['label'].value_counts())
print()
print('Label distribution Test set:')
print(df_test['label'].value_counts())

Label distribution Training set:
label
normal             67343
neptune            41214
satan               3633
ipsweep             3599
portsweep           2931
smurf               2646
nmap                1493
back                 956
teardrop             892
warezclient          890
pod                  201
guess_passwd          53
buffer_overflow       30
warezmaster           20
land                  18
imap                  11
rootkit               10
loadmodule             9
ftp_write              8
multihop               7
phf                    4
perl                   3
spy                    2
Name: count, dtype: int64

Label distribution Test set:
label
normal             9711
neptune            4657
guess_passwd       1231
mscan               996
warezmaster         944
apache2             737
satan               735
processtable        685
smurf               665
back                359
snmpguess           331
saint               319
mailbomb            293
snmpgetattac

**Step 1: Data preprocessing:**

One-Hot-Encoding, tüm kategorik özellikleri ikili özelliklere dönüştürmek için kullanılır. One-Hot-Endcoding gereksinimi, bu transformatöre giriş, kategorik(ayrık) özelliklerle alınan değerleri ifade eden bir tam sayı matrisi olmalıdır. Çıktı, her bir sütunun olası bir değere karşılık geldiği seyrek bir matris olacaktır. Giriş özelliklerinin [0, n_values] aralığında değerler aldıkları varsayılmaktadır. Bu nedenle her kategoriyi bir sayıya dönüştürmek için özelliklerin öncelikle LabelEncoder ile dönüştürülmesi gerekir.

In [11]:
#

print('Training set:')
for col_name in df.columns:
    if df[col_name].dtypes == 'object' :
        unique_cat = len(df[col_name].unique())
        print("Feature '{col_name}' has {unique_cat} categories".format(col_name=col_name, unique_cat=unique_cat))

print()
print('Distribution of categories in service:')
print(df['service'].value_counts().sort_values(ascending=False).head())

Training set:
Feature 'protocol_type' has 3 categories
Feature 'service' has 70 categories
Feature 'flag' has 11 categories
Feature 'label' has 23 categories

Distribution of categories in service:
service
http        40338
private     21853
domain_u     9043
smtp         7313
ftp_data     6860
Name: count, dtype: int64


In [13]:
# Test set
print('Test set:')
for col_name in df_test.columns:
    if df_test[col_name].dtypes == 'object' :
        unique_cat = len(df_test[col_name].unique())
        print("Feature '{col_name}' has {unique_cat} categories".format(col_name=col_name, unique_cat=unique_cat))


Test set:
Feature 'protocol_type' has 3 categories
Feature 'service' has 64 categories
Feature 'flag' has 11 categories
Feature 'label' has 38 categories


**LabelEncoder**

**Insert categorical features into a 2D numpy array**

In [15]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
categorical_columns=['protocol_type', 'service', 'flag']

df_categorical_values = df[categorical_columns]
testdf_categorical_values = df_test[categorical_columns]

df_categorical_values.head()

Unnamed: 0,protocol_type,service,flag
0,tcp,ftp_data,SF
1,udp,other,SF
2,tcp,private,S0
3,tcp,http,SF
4,tcp,http,SF


In [17]:
# protocol type
unique_protocol=sorted(df.protocol_type.unique())
string1 = 'Protocol_type_'
unique_protocol2=[string1 + x for x in unique_protocol]
print(unique_protocol2)

# service
unique_service=sorted(df.service.unique())
string2 = 'service_'
unique_service2=[string2 + x for x in unique_service]
print(unique_service2)


# flag
unique_flag=sorted(df.flag.unique())
string3 = 'flag_'
unique_flag2=[string3 + x for x in unique_flag]
print(unique_flag2)


# put together
dumcols=unique_protocol2 + unique_service2 + unique_flag2


#do it for test set
unique_service_test=sorted(df_test.service.unique())
unique_service2_test=[string2 + x for x in unique_service_test]
testdumcols=unique_protocol2 + unique_service2_test + unique_flag2


['Protocol_type_icmp', 'Protocol_type_tcp', 'Protocol_type_udp']
['service_IRC', 'service_X11', 'service_Z39_50', 'service_aol', 'service_auth', 'service_bgp', 'service_courier', 'service_csnet_ns', 'service_ctf', 'service_daytime', 'service_discard', 'service_domain', 'service_domain_u', 'service_echo', 'service_eco_i', 'service_ecr_i', 'service_efs', 'service_exec', 'service_finger', 'service_ftp', 'service_ftp_data', 'service_gopher', 'service_harvest', 'service_hostnames', 'service_http', 'service_http_2784', 'service_http_443', 'service_http_8001', 'service_imap4', 'service_iso_tsap', 'service_klogin', 'service_kshell', 'service_ldap', 'service_link', 'service_login', 'service_mtp', 'service_name', 'service_netbios_dgm', 'service_netbios_ns', 'service_netbios_ssn', 'service_netstat', 'service_nnsp', 'service_nntp', 'service_ntp_u', 'service_other', 'service_pm_dump', 'service_pop_2', 'service_pop_3', 'service_printer', 'service_private', 'service_red_i', 'service_remote_job', 'ser

**Transform categorical features into numbers using LabelEncoder()**

In [19]:
df_categorical_values_enc=df_categorical_values.apply(LabelEncoder().fit_transform)

print(df_categorical_values.head())
print('--------------------')
print(df_categorical_values_enc.head())

# test set
testdf_categorical_values_enc=testdf_categorical_values.apply(LabelEncoder().fit_transform)

  protocol_type   service flag
0           tcp  ftp_data   SF
1           udp     other   SF
2           tcp   private   S0
3           tcp      http   SF
4           tcp      http   SF
--------------------
   protocol_type  service  flag
0              1       20     9
1              2       44     9
2              1       49     5
3              1       24     9
4              1       24     9


**One-Hot-Encoding**

In [21]:
enc = OneHotEncoder(categories='auto')
df_categorical_values_encenc = enc.fit_transform(df_categorical_values_enc)
df_cat_data = pd.DataFrame(df_categorical_values_encenc.toarray(),columns=dumcols)


# test set
testdf_categorical_values_encenc = enc.fit_transform(testdf_categorical_values_enc)
testdf_cat_data = pd.DataFrame(testdf_categorical_values_encenc.toarray(),columns=testdumcols)

df_cat_data.head()

Unnamed: 0,Protocol_type_icmp,Protocol_type_tcp,Protocol_type_udp,service_IRC,service_X11,service_Z39_50,service_aol,service_auth,service_bgp,service_courier,...,flag_REJ,flag_RSTO,flag_RSTOS0,flag_RSTR,flag_S0,flag_S1,flag_S2,flag_S3,flag_SF,flag_SH
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


**Test setteki eksik sütunlar eklenir**

In [23]:
trainservice=df['service'].tolist()
testservice= df_test['service'].tolist()
difference=list(set(trainservice) - set(testservice))
string = 'service_'
difference=[string + x for x in difference]
difference

['service_http_2784',
 'service_http_8001',
 'service_red_i',
 'service_urh_i',
 'service_aol',
 'service_harvest']

In [25]:
for col in difference:
    testdf_cat_data[col] = 0

print(df_cat_data.shape)    
print(testdf_cat_data.shape)

(125973, 84)
(22544, 84)


In [27]:
newdf=df.join(df_cat_data)
newdf.drop('flag', axis=1, inplace=True)
newdf.drop('protocol_type', axis=1, inplace=True)
newdf.drop('service', axis=1, inplace=True)

# test data
newdf_test=df_test.join(testdf_cat_data)
newdf_test.drop('flag', axis=1, inplace=True)
newdf_test.drop('protocol_type', axis=1, inplace=True)
newdf_test.drop('service', axis=1, inplace=True)

print(newdf.shape)
print(newdf_test.shape)

(125973, 123)
(22544, 123)


 Dataset her atak kategorisi için ayrı datasetlere ayrıldı. Atak etiketleri her biri için yeniden adlandırıldı. 0=Normal, 1=DoS, 2=Probe, 3=R2L, 4=U2R. Yeni datasetlerde etiket sütunu yeni değerler ile değiştirildi.
 
 DoS : 
 
 Probe : 
 
 R2L :
 
 U2R :

In [29]:
labeldf=newdf['label']
labeldf_test=newdf_test['label']


# change the label column
newlabeldf=labeldf.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
                           'ipsweep' : 2,'nmap' : 2,'portsweep' : 2,'satan' : 2,'mscan' : 2,'saint' : 2
                           ,'ftp_write': 3,'guess_passwd': 3,'imap': 3,'multihop': 3,'phf': 3,'spy': 3,'warezclient': 3,'warezmaster': 3,'sendmail': 3,'named': 3,'snmpgetattack': 3,'snmpguess': 3,'xlock': 3,'xsnoop': 3,'httptunnel': 3,
                           'buffer_overflow': 4,'loadmodule': 4,'perl': 4,'rootkit': 4,'ps': 4,'sqlattack': 4,'xterm': 4})
newlabeldf_test=labeldf_test.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
                           'ipsweep' : 2,'nmap' : 2,'portsweep' : 2,'satan' : 2,'mscan' : 2,'saint' : 2
                           ,'ftp_write': 3,'guess_passwd': 3,'imap': 3,'multihop': 3,'phf': 3,'spy': 3,'warezclient': 3,'warezmaster': 3,'sendmail': 3,'named': 3,'snmpgetattack': 3,'snmpguess': 3,'xlock': 3,'xsnoop': 3,'httptunnel': 3,
                           'buffer_overflow': 4,'loadmodule': 4,'perl': 4,'rootkit': 4,'ps': 4,'sqlattack': 4,'xterm': 4})



# put the new label column back
newdf['label'] = newlabeldf
newdf_test['label'] = newlabeldf_test

  newlabeldf=labeldf.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,
  newlabeldf_test=labeldf_test.replace({ 'normal' : 0, 'neptune' : 1 ,'back': 1, 'land': 1, 'pod': 1, 'smurf': 1, 'teardrop': 1,'mailbomb': 1, 'apache2': 1, 'processtable': 1, 'udpstorm': 1, 'worm': 1,


In [31]:
to_drop_DoS = [0,1]
to_drop_Probe = [0,2]
to_drop_R2L = [0,3]
to_drop_U2R = [0,4]

# Kendisi dışındaki label değerine sahip tüm satırları filtrele
# isin filter function

DoS_df=newdf[newdf['label'].isin(to_drop_DoS)];
Probe_df=newdf[newdf['label'].isin(to_drop_Probe)];
R2L_df=newdf[newdf['label'].isin(to_drop_R2L)];
U2R_df=newdf[newdf['label'].isin(to_drop_U2R)];



#test
DoS_df_test=newdf_test[newdf_test['label'].isin(to_drop_DoS)];
Probe_df_test=newdf_test[newdf_test['label'].isin(to_drop_Probe)];
R2L_df_test=newdf_test[newdf_test['label'].isin(to_drop_R2L)];
U2R_df_test=newdf_test[newdf_test['label'].isin(to_drop_U2R)];


print('Train:')
print('Dimensions of DoS:' ,DoS_df.shape)
print('Dimensions of Probe:' ,Probe_df.shape)
print('Dimensions of R2L:' ,R2L_df.shape)
print('Dimensions of U2R:' ,U2R_df.shape)
print()
print('Test:')
print('Dimensions of DoS:' ,DoS_df_test.shape)
print('Dimensions of Probe:' ,Probe_df_test.shape)
print('Dimensions of R2L:' ,R2L_df_test.shape)
print('Dimensions of U2R:' ,U2R_df_test.shape)

Train:
Dimensions of DoS: (113270, 123)
Dimensions of Probe: (78999, 123)
Dimensions of R2L: (68338, 123)
Dimensions of U2R: (67395, 123)

Test:
Dimensions of DoS: (17171, 123)
Dimensions of Probe: (12132, 123)
Dimensions of R2L: (12596, 123)
Dimensions of U2R: (9778, 123)


**Step 2: Feature Scaling**

In [33]:
# Split dataframes into X & Y
X_DoS = DoS_df.drop(columns=['label'])
Y_DoS = DoS_df['label']

X_Probe = Probe_df.drop(columns=['label'])
Y_Probe = Probe_df['label']

X_R2L = R2L_df.drop(columns=['label'])
Y_R2L = R2L_df['label']

X_U2R = U2R_df.drop(columns=['label'])
Y_U2R = U2R_df['label']

# Test set
X_DoS_test = DoS_df_test.drop(columns=['label'])
Y_DoS_test = DoS_df_test['label']

X_Probe_test = Probe_df_test.drop(columns=['label'])
Y_Probe_test = Probe_df_test['label']

X_R2L_test = R2L_df_test.drop(columns=['label'])
Y_R2L_test = R2L_df_test['label']

X_U2R_test = U2R_df_test.drop(columns=['label'])
Y_U2R_test = U2R_df_test['label']

In [35]:
colNames=list(X_DoS)
colNames_test=list(X_DoS_test)

In [44]:
colNames=list(X_DoS)
colNames_test=list(X_DoS_test)

Before scaling - data check:
DoS_df shape: (0, 123)
Probe_df shape: (0, 123)
R2L_df shape: (0, 123)
U2R_df shape: (0, 123)


In [37]:
from sklearn import preprocessing

scaler1 = preprocessing.StandardScaler().fit(X_DoS)
X_DoS=scaler1.transform(X_DoS) 

scaler2 = preprocessing.StandardScaler().fit(X_Probe)
X_Probe=scaler2.transform(X_Probe)

scaler3 = preprocessing.StandardScaler().fit(X_R2L)
X_R2L=scaler3.transform(X_R2L)

scaler4 = preprocessing.StandardScaler().fit(X_U2R)
X_U2R=scaler4.transform(X_U2R) 

# test data
scaler5 = preprocessing.StandardScaler().fit(X_DoS_test)
X_DoS_test=scaler5.transform(X_DoS_test) 

scaler6 = preprocessing.StandardScaler().fit(X_Probe_test)
X_Probe_test=scaler6.transform(X_Probe_test) 

scaler7 = preprocessing.StandardScaler().fit(X_R2L_test)
X_R2L_test=scaler7.transform(X_R2L_test) 

scaler8 = preprocessing.StandardScaler().fit(X_U2R_test)
X_U2R_test=scaler8.transform(X_U2R_test)

In [41]:
from sklearn.feature_selection import RFE
import xgboost as xgb
from sklearn.metrics import confusion_matrix
import pandas as pd

# Initialize XGBoost classifier for RFE
xgb_clf = xgb.XGBClassifier(
    n_estimators=10,
    n_jobs=2,
    random_state=42
)

In [45]:
# Make each classification binary (0 for non-attack, 1 for attack)
# For DoS
Y_DoS = (Y_DoS == 1).astype(int)

# For Probe
Y_Probe = (Y_Probe == 2).astype(int)

# For R2L
Y_R2L = (Y_R2L == 3).astype(int)

# For U2R
Y_U2R = (Y_U2R == 4).astype(int)

# Do the same for test sets
Y_DoS_test = (Y_DoS_test == 1).astype(int)
Y_Probe_test = (Y_Probe_test == 2).astype(int)
Y_R2L_test = (Y_R2L_test == 3).astype(int)
Y_U2R_test = (Y_U2R_test == 4).astype(int)

# Now try RFE
from sklearn.feature_selection import RFE
import xgboost as xgb

# Initialize XGBoost
xgb_clf = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)

# Feature Selection with RFE
rfe = RFE(estimator=xgb_clf, n_features_to_select=13, step=1)

# DoS
rfe.fit(X_DoS, Y_DoS)
X_rfeDoS = rfe.transform(X_DoS)
true = rfe.support_
rfecolindex_DoS = [i for i, x in enumerate(true) if x]
rfecolname_DoS = list(colNames[i] for i in rfecolindex_DoS)

# Probe
rfe.fit(X_Probe, Y_Probe)
X_rfeProbe = rfe.transform(X_Probe)
true = rfe.support_
rfecolindex_Probe = [i for i, x in enumerate(true) if x]
rfecolname_Probe = list(colNames[i] for i in rfecolindex_Probe)

# R2L
rfe.fit(X_R2L, Y_R2L)
X_rfeR2L = rfe.transform(X_R2L)
true = rfe.support_
rfecolindex_R2L = [i for i, x in enumerate(true) if x]
rfecolname_R2L = list(colNames[i] for i in rfecolindex_R2L)

# U2R
rfe.fit(X_U2R, Y_U2R)
X_rfeU2R = rfe.transform(X_U2R)
true = rfe.support_
rfecolindex_U2R = [i for i, x in enumerate(true) if x]
rfecolname_U2R = list(colNames[i] for i in rfecolindex_U2R)

In [47]:
print(X_rfeDoS.shape)
print(X_rfeProbe.shape)
print(X_rfeR2L.shape)
print(X_rfeU2R.shape)

(113270, 13)
(78999, 13)
(68338, 13)
(67395, 13)


In [49]:
# Train XGBoost models with all features
xgb_DoS = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_Probe = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_R2L = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_U2R = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)

xgb_DoS.fit(X_DoS, Y_DoS.astype(int))
xgb_Probe.fit(X_Probe, Y_Probe.astype(int))
xgb_R2L.fit(X_R2L, Y_R2L.astype(int))
xgb_U2R.fit(X_U2R, Y_U2R.astype(int))

In [51]:
# Train XGBoost models with selected features
xgb_rfeDoS = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_rfeProbe = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_rfeR2L = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)
xgb_rfeU2R = xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)

xgb_rfeDoS.fit(X_rfeDoS, Y_DoS.astype(int))
xgb_rfeProbe.fit(X_rfeProbe, Y_Probe.astype(int))
xgb_rfeR2L.fit(X_rfeR2L, Y_R2L.astype(int))
xgb_rfeU2R.fit(X_rfeU2R, Y_U2R.astype(int))

In [53]:
# Make predictions
Y_DoS_pred = xgb_DoS.predict(X_DoS_test)
Y_Probe_pred = xgb_Probe.predict(X_Probe_test)
Y_R2L_pred = xgb_R2L.predict(X_R2L_test)
Y_U2R_pred = xgb_U2R.predict(X_U2R_test)

In [55]:
# Create confusion matrices
print("\nDoS Confusion Matrix:")
print(pd.crosstab(Y_DoS_test, Y_DoS_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nProbe Confusion Matrix:")
print(pd.crosstab(Y_Probe_test, Y_Probe_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nR2L Confusion Matrix:")
print(pd.crosstab(Y_R2L_test, Y_R2L_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nU2R Confusion Matrix:")
print(pd.crosstab(Y_U2R_test, Y_U2R_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

# View probabilities for DoS (example)
print("\nProbability predictions for first 10 DoS observations:")
print(xgb_DoS.predict_proba(X_DoS_test)[0:10])


DoS Confusion Matrix:
Predicted attacks     0     1
Actual attacks               
0                  9479   232
1                  2830  4630

Probe Confusion Matrix:
Predicted attacks     0     1
Actual attacks               
0                  3494  6217
1                   763  1658

R2L Confusion Matrix:
Predicted attacks     0
Actual attacks         
0                  9711
1                  2885

U2R Confusion Matrix:
Predicted attacks     0
Actual attacks         
0                  9711
1                    67

Probability predictions for first 10 DoS observations:
[[0.02549195 0.97450805]
 [0.02549195 0.97450805]
 [0.9647553  0.03524473]
 [0.98054695 0.01945303]
 [0.7833458  0.21665415]
 [0.98054695 0.01945303]
 [0.9386531  0.06134691]
 [0.02549195 0.97450805]
 [0.96161103 0.03838897]
 [0.92288727 0.07711271]]


In [57]:
from sklearn.model_selection import cross_val_score

# DoS Cross Validation
print("\nDoS Metrics:")
print("-" * 20)
accuracy = cross_val_score(xgb_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(xgb_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(xgb_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(xgb_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# Probe Cross Validation
print("\nProbe Metrics:")
print("-" * 20)
accuracy = cross_val_score(xgb_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(xgb_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(xgb_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(xgb_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# R2L Cross Validation
print("\nR2L Metrics:")
print("-" * 20)
accuracy = cross_val_score(xgb_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(xgb_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(xgb_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(xgb_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# U2R Cross Validation
print("\nU2R Metrics:")
print("-" * 20)
accuracy = cross_val_score(xgb_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(xgb_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(xgb_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(xgb_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))


DoS Metrics:
--------------------
Accuracy: 0.99750 (+/- 0.00239)
Precision: 0.99772 (+/- 0.00294)
Recall: 0.99651 (+/- 0.00436)
F-measure: 0.99712 (+/- 0.00276)

Probe Metrics:
--------------------
Accuracy: 0.99621 (+/- 0.00405)
Precision: 0.99254 (+/- 0.00894)
Recall: 0.98843 (+/- 0.01323)
F-measure: 0.99047 (+/- 0.01021)

R2L Metrics:
--------------------
Accuracy: 0.97888 (+/- 0.00725)
Precision: 0.94668 (+/- 0.02618)
Recall: 0.96221 (+/- 0.02207)
F-measure: 0.95429 (+/- 0.01549)

U2R Metrics:
--------------------
Accuracy: 0.99765 (+/- 0.00259)
Precision: 0.92738 (+/- 0.18518)
Recall: 0.71905 (+/- 0.31930)
F-measure: 0.80021 (+/- 0.23126)


In [59]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pandas as pd

# Initialize Logistic Regression for RFE
lr = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)

# Feature Selection with RFE
rfe = RFE(estimator=lr, n_features_to_select=13, step=1)

# DoS
rfe.fit(X_DoS, Y_DoS.astype(int))
X_rfeDoS = rfe.transform(X_DoS)
true = rfe.support_
rfecolindex_DoS = [i for i, x in enumerate(true) if x]
rfecolname_DoS = list(colNames[i] for i in rfecolindex_DoS)

# Probe
rfe.fit(X_Probe, Y_Probe.astype(int))
X_rfeProbe = rfe.transform(X_Probe)
true = rfe.support_
rfecolindex_Probe = [i for i, x in enumerate(true) if x]
rfecolname_Probe = list(colNames[i] for i in rfecolindex_Probe)

# R2L
rfe.fit(X_R2L, Y_R2L.astype(int))
X_rfeR2L = rfe.transform(X_R2L)
true = rfe.support_
rfecolindex_R2L = [i for i, x in enumerate(true) if x]
rfecolname_R2L = list(colNames[i] for i in rfecolindex_R2L)

# U2R
rfe.fit(X_U2R, Y_U2R.astype(int))
X_rfeU2R = rfe.transform(X_U2R)
true = rfe.support_
rfecolindex_U2R = [i for i, x in enumerate(true) if x]
rfecolname_U2R = list(colNames[i] for i in rfecolindex_U2R)

print(X_rfeDoS.shape)
print(X_rfeProbe.shape)
print(X_rfeR2L.shape)
print(X_rfeU2R.shape)

# Train Logistic Regression models with all features
lr_DoS = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_Probe = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_R2L = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_U2R = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)

lr_DoS.fit(X_DoS, Y_DoS.astype(int))
lr_Probe.fit(X_Probe, Y_Probe.astype(int))
lr_R2L.fit(X_R2L, Y_R2L.astype(int))
lr_U2R.fit(X_U2R, Y_U2R.astype(int))

# Train Logistic Regression models with selected features
lr_rfeDoS = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_rfeProbe = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_rfeR2L = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)
lr_rfeU2R = LogisticRegression(max_iter=1000, n_jobs=2, random_state=42)

lr_rfeDoS.fit(X_rfeDoS, Y_DoS.astype(int))
lr_rfeProbe.fit(X_rfeProbe, Y_Probe.astype(int))
lr_rfeR2L.fit(X_rfeR2L, Y_R2L.astype(int))
lr_rfeU2R.fit(X_rfeU2R, Y_U2R.astype(int))

# Make predictions
Y_DoS_pred = lr_DoS.predict(X_DoS_test)
Y_Probe_pred = lr_Probe.predict(X_Probe_test)
Y_R2L_pred = lr_R2L.predict(X_R2L_test)
Y_U2R_pred = lr_U2R.predict(X_U2R_test)

# Create confusion matrices
print("\nDoS Confusion Matrix:")
print(pd.crosstab(Y_DoS_test, Y_DoS_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nProbe Confusion Matrix:")
print(pd.crosstab(Y_Probe_test, Y_Probe_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nR2L Confusion Matrix:")
print(pd.crosstab(Y_R2L_test, Y_R2L_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

print("\nU2R Confusion Matrix:")
print(pd.crosstab(Y_U2R_test, Y_U2R_pred, rownames=['Actual attacks'], colnames=['Predicted attacks']))

# View probabilities for DoS (example)
print("\nProbability predictions for first 10 DoS observations:")
print(lr_DoS.predict_proba(X_DoS_test)[0:10])

(113270, 13)
(78999, 13)
(68338, 13)
(67395, 13)

DoS Confusion Matrix:
Predicted attacks     0     1
Actual attacks               
0                  9100   611
1                  1265  6195

Probe Confusion Matrix:
Predicted attacks     0     1
Actual attacks               
0                  9548   163
1                  1311  1110

R2L Confusion Matrix:
Predicted attacks     0    1
Actual attacks              
0                  9648   63
1                  2769  116

U2R Confusion Matrix:
Predicted attacks     0  1
Actual attacks            
0                  9710  1
1                    67  0

Probability predictions for first 10 DoS observations:
[[1.37764058e-03 9.98622359e-01]
 [2.43869252e-03 9.97561307e-01]
 [9.99502963e-01 4.97036558e-04]
 [9.94344372e-01 5.65562828e-03]
 [1.50899447e-01 8.49100553e-01]
 [9.85485737e-01 1.45142632e-02]
 [7.39769500e-02 9.26023050e-01]
 [2.96601023e-03 9.97033990e-01]
 [2.62007824e-07 9.99999738e-01]
 [9.99292040e-01 7.07959590e-04]]


In [61]:
# Cross Validation for each attack type
from sklearn.model_selection import cross_val_score

# DoS Cross Validation
print("\nDoS Metrics:")
print("-" * 20)
accuracy = cross_val_score(lr_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(lr_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(lr_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(lr_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# Probe Cross Validation
print("\nProbe Metrics:")
print("-" * 20)
accuracy = cross_val_score(lr_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(lr_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(lr_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(lr_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# R2L Cross Validation
print("\nR2L Metrics:")
print("-" * 20)
accuracy = cross_val_score(lr_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(lr_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(lr_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(lr_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# U2R Cross Validation
print("\nU2R Metrics:")
print("-" * 20)
accuracy = cross_val_score(lr_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(lr_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(lr_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(lr_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))


DoS Metrics:
--------------------
Accuracy: 0.99383 (+/- 0.00365)
Precision: 0.99093 (+/- 0.00697)
Recall: 0.99491 (+/- 0.00375)
F-measure: 0.99291 (+/- 0.00418)

Probe Metrics:
--------------------
Accuracy: 0.98426 (+/- 0.00497)
Precision: 0.94753 (+/- 0.01820)
Recall: 0.97521 (+/- 0.01922)
F-measure: 0.96112 (+/- 0.01232)

R2L Metrics:
--------------------
Accuracy: 0.96570 (+/- 0.00848)
Precision: 0.90401 (+/- 0.02470)
Recall: 0.95148 (+/- 0.02317)
F-measure: 0.92706 (+/- 0.01780)

U2R Metrics:
--------------------
Accuracy: 0.99683 (+/- 0.00323)
Precision: 0.86357 (+/- 0.26897)
Recall: 0.67619 (+/- 0.41905)
F-measure: 0.73131 (+/- 0.30207)


In [63]:
from sklearn.ensemble import StackingClassifier
from datetime import datetime
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

# Print header
print("Stacking Ensemble Analysis (XGBoost + Logistic Regression)")
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"User: shay-haan")
print("=" * 70)

# Define base models and meta model
estimators = [
    ('xgb', xgb.XGBClassifier(n_estimators=10, n_jobs=2, random_state=42)),
    ('lr', LogisticRegression(max_iter=1000, n_jobs=2, random_state=42))
]

# Create stacking model with LogisticRegression as meta-classifier
stacking = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5,
    n_jobs=2
)

# Train models for each attack type
# DoS
stack_DoS = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5,
    n_jobs=2
)
stack_DoS.fit(X_DoS, Y_DoS)

# Probe
stack_Probe = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5,
    n_jobs=2
)
stack_Probe.fit(X_Probe, Y_Probe)

# R2L
stack_R2L = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5,
    n_jobs=2
)
stack_R2L.fit(X_R2L, Y_R2L)

# U2R
stack_U2R = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5,
    n_jobs=2
)
stack_U2R.fit(X_U2R, Y_U2R)

Stacking Ensemble Analysis (XGBoost + Logistic Regression)
Date: 2025-04-05 09:10:46 UTC
User: shay-haan


In [69]:
# Make predictions using stacked models
Y_DoS_pred = stack_DoS.predict(X_DoS_test)
Y_Probe_pred = stack_Probe.predict(X_Probe_test)


# Print confusion matrices
print("\nConfusion Matrices:")
print("\nDoS Confusion Matrix:")
print(pd.crosstab(Y_DoS_test, Y_DoS_pred, rownames=['Actual'], colnames=['Predicted']))

print("\nProbe Confusion Matrix:")
print(pd.crosstab(Y_Probe_test, Y_Probe_pred, rownames=['Actual'], colnames=['Predicted']))




Confusion Matrices:

DoS Confusion Matrix:
Predicted     0     1
Actual               
0          9447   264
1          2453  5007

Probe Confusion Matrix:
Predicted     0     1
Actual               
0          6499  3212
1           880  1541


In [71]:
# 1. Add class weights
xgb_balanced = xgb.XGBClassifier(
    n_estimators=10,
    scale_pos_weight=len(Y_R2L[Y_R2L==0])/len(Y_R2L[Y_R2L==1]),  # automatically calculate weight
    n_jobs=2,
    random_state=42
)

lr_balanced = LogisticRegression(
    max_iter=1000,
    class_weight='balanced',
    n_jobs=2,
    random_state=42
)

# 2. Create new balanced stacking model
estimators_balanced = [
    ('xgb', xgb_balanced),
    ('lr', lr_balanced)
]

stack_balanced = StackingClassifier(
    estimators=estimators_balanced,
    final_estimator=LogisticRegression(class_weight='balanced', max_iter=1000),
    cv=5,
    n_jobs=2
)

# 3. Train models with balanced settings
stack_R2L_balanced = stack_balanced.fit(X_R2L, Y_R2L)
stack_U2R_balanced = stack_balanced.fit(X_U2R, Y_U2R)

# 4. Make predictions and show new confusion matrices
Y_R2L_pred_balanced = stack_R2L_balanced.predict(X_R2L_test)
Y_U2R_pred_balanced = stack_U2R_balanced.predict(X_U2R_test)

print("\nR2L Confusion Matrix (Balanced):")
print(pd.crosstab(Y_R2L_test, Y_R2L_pred_balanced, rownames=['Actual'], colnames=['Predicted']))

print("\nU2R Confusion Matrix (Balanced):")
print(pd.crosstab(Y_U2R_test, Y_U2R_pred_balanced, rownames=['Actual'], colnames=['Predicted']))


R2L Confusion Matrix (Balanced):
Predicted     0    1
Actual              
0          9650   61
1          2743  142

U2R Confusion Matrix (Balanced):
Predicted     0   1
Actual             
0          9635  76
1            54  13


In [73]:
# Cross Validation for each attack type
print("\nStacking Model Evaluation:")

# DoS Cross Validation
print("\nDoS Metrics:")
print("-" * 20)
accuracy = cross_val_score(stack_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(stack_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(stack_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(stack_DoS, X_DoS_test, Y_DoS_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# Probe Cross Validation
print("\nProbe Metrics:")
print("-" * 20)
accuracy = cross_val_score(stack_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(stack_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(stack_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(stack_Probe, X_Probe_test, Y_Probe_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# R2L Cross Validation
print("\nR2L Metrics:")
print("-" * 20)
accuracy = cross_val_score(stack_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(stack_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(stack_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(stack_R2L, X_R2L_test, Y_R2L_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))

# U2R Cross Validation
print("\nU2R Metrics:")
print("-" * 20)
accuracy = cross_val_score(stack_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='accuracy')
print("Accuracy: %0.5f (+/- %0.5f)" % (accuracy.mean(), accuracy.std() * 2))
precision = cross_val_score(stack_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='precision')
print("Precision: %0.5f (+/- %0.5f)" % (precision.mean(), precision.std() * 2))
recall = cross_val_score(stack_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='recall')
print("Recall: %0.5f (+/- %0.5f)" % (recall.mean(), recall.std() * 2))
f1 = cross_val_score(stack_U2R, X_U2R_test, Y_U2R_test, cv=10, scoring='f1')
print("F-measure: %0.5f (+/- %0.5f)" % (f1.mean(), f1.std() * 2))



Stacking Model Evaluation:

DoS Metrics:
--------------------
Accuracy: 0.99755 (+/- 0.00243)
Precision: 0.99799 (+/- 0.00300)
Recall: 0.99638 (+/- 0.00380)
F-measure: 0.99718 (+/- 0.00280)

Probe Metrics:
--------------------
Accuracy: 0.99580 (+/- 0.00400)
Precision: 0.99296 (+/- 0.01225)
Recall: 0.98596 (+/- 0.01441)
F-measure: 0.98943 (+/- 0.01009)

R2L Metrics:
--------------------
Accuracy: 0.97753 (+/- 0.00671)
Precision: 0.94129 (+/- 0.02835)
Recall: 0.96222 (+/- 0.02065)
F-measure: 0.95153 (+/- 0.01391)

U2R Metrics:
--------------------
Accuracy: 0.99703 (+/- 0.00281)
Precision: 0.94167 (+/- 0.23629)
Recall: 0.62857 (+/- 0.36651)
F-measure: 0.73193 (+/- 0.28663)
