# Importing Packages

In [3]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pickle as pkl

# Loading Dataset

In [5]:
colnames = ["duration","protocol_type","service","flag","src_bytes","dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins","logged_in","num_compromised","root_shell","su_attempted","num_root","num_file_creations","num_shells","num_access_files","num_outbound_cmds","is_host_login","is_guest_login","count","srv_count","serror_rate","srv_serror_rate","same_srv_rate","diff_srv_rate","srv_diff_host_rate","una1","una2","dst_host_count","dst_host_srv_count","dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate","dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate","dst_host_rerror_rate","dst_host_srv_rerror_rate","result"]
df = pd.read_csv("icmp_revised_kddcup_dataset.csv", header=None,names=colnames ,index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
icmp_df = df[df.loc[:,"protocol_type"] == "icmp"]

In [7]:
icmp_features = ["service","src_bytes","srv_count"]
icmp_target = "result"

icmp_y = icmp_df.loc[:,icmp_target]

In [8]:
classes = np.unique(icmp_y)
print(classes)

['ipsweep.' 'multihop.' 'normal.' 'pod.' 'saint.' 'satan.' 'smurf.'
 'snmpguess.']


In [9]:
for i in range(len(classes)):
    if i == 2:
        icmp_df = icmp_df.replace(classes[i], 0)
    else:
        icmp_df = icmp_df.replace(classes[i], 1)

#turning the service attribute to categorical values
icmp_df=icmp_df.replace("eco_i",-0.1)
icmp_df=icmp_df.replace("ecr_i",0.0)
icmp_df=icmp_df.replace("tim_i",0.1)
icmp_df=icmp_df.replace("urp_i",0.2)

icmp_y = icmp_df.loc[:,icmp_target]
icmp_X = icmp_df.loc[:,icmp_features]

# Training a Random Forest Classifier to list out importance of features

In [11]:
from sklearn.ensemble import RandomForestClassifier
rs = RandomForestClassifier()
rs.fit(icmp_X,icmp_y)
print(pd.Series(rs.feature_importances_,index=icmp_features).sort_values(ascending=False))

src_bytes    0.565416
srv_count    0.305651
service      0.128933
dtype: float64


# Training the Model

In [15]:
icmp_X_train, icmp_X_test, icmp_y_train, icmp_y_test = train_test_split(icmp_X, icmp_y, random_state=42, test_size=0.3)

In [56]:
icmp_model = MLPClassifier(alpha=0.005)
 
icmp_model.fit(icmp_X_train,icmp_y_train)

MLPClassifier(activation='relu', alpha=0.005, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

# Serializing the Model to make it Usable

In [57]:
icmp_filename = "icmp_model.pkl"
pkl.dump(icmp_model, open(icmp_filename, 'wb'))

# Loading and testing the model

In [59]:
loaded_icmp_model = pkl.load(open(icmp_filename, 'rb'))
icmp_y_pred = loaded_icmp_model.predict(icmp_X_test)
icmp_score = accuracy_score(icmp_y_test, icmp_y_pred)*100
print(icmp_score)

99.78581964397567
