C45.py

# -*- coding: utf-8 -*-
"""Network Classification with C45

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1agZCfDYi8bZLC4banJUS0KZu9lS-VfjM
"""

from google.colab import drive
drive.mount('/content/drive')

# Commented out IPython magic to ensure Python compatibility.
# %cd /content/drive/MyDrive/MSR/MS 2nd Sem/ACN Project

#import the libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#read NIMS dataset using pandas
data_set = pd.read_csv("/content/drive/MyDrive/MSR/MS 2nd Sem/ACN Project/NIMS_file.csv")
data_set

"""#Data Preprocessing"""

data_set.shape

data_set.isnull()

#fill 0 inplace of nan values
data_set.fillna(0)

#different classes
protocoal_class=data_set.loc[:,"class"].values
protocoal_class

#count the number of classes
data_set['class'].value_counts()

#drop columns
data_set=data_set.drop(index=data_set[data_set['class']=='lime'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='DNS'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='HTTP'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='shell'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='sftp'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='x11'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='FTP'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='TELNET'].index)
data_set.head()

#Extracting Independent and dependent Variable  
x= data_set.iloc[:, 0:23].values  
y= data_set.iloc[:, -1].values  
  
# Splitting the dataset into training and test set.  
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0)  
  
#feature Scaling  
from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()  
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)

#Fitting Decision Tree classifier to the training set  
from sklearn.tree import DecisionTreeClassifier 
#classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
clf = DecisionTreeClassifier(max_depth =3, random_state = 0,criterion='entropy')
clf.fit(x_train, y_train)

#Predicting the test set result  
y_pred= clf.predict(x_test)

#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
cm

from sklearn import metrics
metrics.accuracy_score(y_test,y_pred)

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))