In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import time

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score, precision_score

In [2]:
def plot_confusion_matrix(data, labels, output_filename):
    seaborn.set(color_codes=True)
    plt.figure(1, figsize=(9, 6))
    plt.title("Confusion Matrix")
    seaborn.set(font_scale=1.4)
    ax = seaborn.heatmap(data, annot=True, cmap="YlGnBu", cbar_kws={'label': 'Scale'})
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)
    ax.set(ylabel="True Label", xlabel="Predicted Label")
    plt.show()
    
def starttime():
    return time.time()

def endtime():
    return time.time()

In [None]:
dataset = pd.read_csv("/media/kmdr7/Seagate/DATASETS/IOT-23-CIC/Dataset/4-PCA-3-ZScore-2-Mean.csv")
dataset["Label"] = dataset["Label"].astype("category").cat.codes
clean_dataset = dataset.drop(columns=["Label"])
label = dataset["Label"]
dataset

In [None]:
## Bagi dataset menjadi train dan test
x_train, x_test, y_train, y_test = train_test_split(
    clean_dataset, label, test_size=1/7.0, random_state=1
)

In [None]:
start = starttime()
clf = DecisionTreeClassifier(criterion="entropy")
clf.fit(x_train, y_train)
end = starttime()
print(end-start)

In [None]:
scores = cross_val_score(estimator=clf, X=clean_dataset, y=label, cv=5, n_jobs=8)
print("mean: {:.3f} (std: {:.3f})".format(scores.mean(), scores.std()), end="\n\n" )

In [None]:
y_pred = clf.predict(x_test)

In [None]:
# Accucary
acc = accuracy_score(y_test, y_pred)
acc

In [None]:
# Recall
recall = recall_score(y_test, y_pred)
recall

In [None]:
# F1 Score
f1 = f1_score(y_test, y_pred)
f1

In [None]:
# Precision
precision = precision_score(y_test, y_pred)
precision

In [None]:
labels = ["Benign", "Malicious"]
conf = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(conf, labels, "confusion_matrix.png")