-
Notifications
You must be signed in to change notification settings - Fork 0
/
C45.py
84 lines (63 loc) · 2.57 KB
/
C45.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
"""Network Classification with C45
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1agZCfDYi8bZLC4banJUS0KZu9lS-VfjM
"""
from google.colab import drive
drive.mount('/content/drive')
# Commented out IPython magic to ensure Python compatibility.
# %cd /content/drive/MyDrive/MSR/MS 2nd Sem/ACN Project
#import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#read NIMS dataset using pandas
data_set = pd.read_csv("/content/drive/MyDrive/MSR/MS 2nd Sem/ACN Project/NIMS_file.csv")
data_set
"""#Data Preprocessing"""
data_set.shape
data_set.isnull()
#fill 0 inplace of nan values
data_set.fillna(0)
#different classes
protocoal_class=data_set.loc[:,"class"].values
protocoal_class
#count the number of classes
data_set['class'].value_counts()
#drop columns
data_set=data_set.drop(index=data_set[data_set['class']=='lime'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='DNS'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='HTTP'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='shell'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='sftp'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='x11'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='FTP'].index)
data_set=data_set.drop(index=data_set[data_set['class']=='TELNET'].index)
data_set.head()
#Extracting Independent and dependent Variable
x= data_set.iloc[:, 0:23].values
y= data_set.iloc[:, -1].values
# Splitting the dataset into training and test set.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0)
#feature Scaling
from sklearn.preprocessing import StandardScaler
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
#Fitting Decision Tree classifier to the training set
from sklearn.tree import DecisionTreeClassifier
#classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)
clf = DecisionTreeClassifier(max_depth =3, random_state = 0,criterion='entropy')
clf.fit(x_train, y_train)
#Predicting the test set result
y_pred= clf.predict(x_test)
#Creating the Confusion matrix
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test, y_pred)
cm
from sklearn import metrics
metrics.accuracy_score(y_test,y_pred)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))