In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import metrics
from sklearn.feature_selection import mutual_info_classif

# โหลดข้อมูลจากไฟล์ CSV
df = pd.read_csv('UNSW_NB15_testing-set.csv')

X = df[['spkts', 'dpkts', 'sbytes','dbytes']]
y = df['label']

# แบ่งข้อมูลเป็นชุดฝึกและชุดทดสอบ
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# หา Information Gain ของแต่ละ feature
ig_scores = mutual_info_classif(X_train, y_train)

# แสดง Information Gain ของแต่ละ feature
for feature, ig_score in zip(X_train.columns, ig_scores):
    print(f'Information Gain for {feature}: {ig_score}')

# ใช้ ExtraTreesClassifier
model = ExtraTreesClassifier()
model.fit(X_train, y_train)

# ทำนายค่า 'label' ด้วยชุดทดสอบ
y_pred = model.predict(X_test)

# ประเมินประสิทธิภาพ
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# แสดง Confusion Matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

# แสดง Classification Report
class_report = metrics.classification_report(y_test, y_pred)
print('Classification Report:')
print(class_report)


Information Gain for spkts: 0.17638782766219618
Information Gain for dpkts: 0.27329108091097276
Information Gain for sbytes: 0.4669911913826714
Information Gain for dbytes: 0.37246446161199254
Accuracy: 0.940317659471328
Confusion Matrix:
[[ 9938  1227]
 [  866 23038]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.89      0.90     11165
           1       0.95      0.96      0.96     23904

    accuracy                           0.94     35069
   macro avg       0.93      0.93      0.93     35069
weighted avg       0.94      0.94      0.94     35069

