In [None]:
pip install xgboost



# Skenario 1 : Dataset 1, Tanpa KFold

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Membagi data menjadi data training dan data testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Melatih model
xgb_model.fit(X_train, y_train)

# 5. Prediksi pada data training
y_train_pred = xgb_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan:")
train_accuracy = accuracy_score(y_train, y_train_pred)
train_report = classification_report(y_train, y_train_pred)
print("Akurasi pada Data Pelatihan:", train_accuracy)
print("Laporan Klasifikasi:")
print(train_report)
print("-" * 50)

# 6. Prediksi pada data test
y_test_pred = xgb_model.predict(X_test)

print("Hasil evaluasi menggunakan data test:")
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)
print("Akurasi pada Data Test:", test_accuracy)
print("Laporan Klasifikasi:")
print(test_report)

Parameters: { "use_label_encoder" } are not used.



Hasil evaluasi menggunakan data pelatihan:
Akurasi pada Data Pelatihan: 0.9999120693596891
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    227565
           1       1.00      1.00      1.00    227339

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

--------------------------------------------------
Hasil evaluasi menggunakan data test:
Akurasi pada Data Test: 0.9995075884142588
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Skenario 2 : Dataset 1, Dengan KFold

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Menyimpan hasil evaluasi
fold = 1
accuracy_scores = []

print("Evaluasi Model XGBoost Menggunakan K-Fold Cross Validation (k=5)\n")

for train_index, test_index in kf.split(X):
    # Membagi data berdasarkan indeks dari K-Fold
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    xgb_model.fit(X_train, y_train)

    # Prediksi pada data test
    y_pred = xgb_model.predict(X_test)

    # Evaluasi performa model
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(fold_accuracy)

    print(f"Fold {fold}")
    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

    fold += 1

# Rata-rata akurasi
mean_accuracy = sum(accuracy_scores) / len(accuracy_scores)
print("Rata-rata Akurasi dari 5-Fold Cross Validation:", mean_accuracy)


Evaluasi Model XGBoost Menggunakan K-Fold Cross Validation (k=5)



Parameters: { "use_label_encoder" } are not used.



Fold 1
Akurasi: 0.9994987953502278
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 2
Akurasi: 0.999621898246663
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56950
           1       1.00      1.00      1.00     56776

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 3
Akurasi: 0.9994724161581344
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56861
           1       1.00      1.00      1.00     56865

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 4
Akurasi: 0.9994108647099168
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56804
           1       1.00      1.00      1.00     56922

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 5
Akurasi: 0.9995691398624765
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56950
           1       1.00      1.00      1.00     56776

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Rata-rata Akurasi dari 5-Fold Cross Validation: 0.9995146228654838


# Skenario 3 : Dataset 2, Tanpa KFold

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Membagi data menjadi data training dan data testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Melatih model
xgb_model.fit(X_train, y_train)

# 5. Prediksi pada data training
y_train_pred = xgb_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan:")
train_accuracy = accuracy_score(y_train, y_train_pred)
train_report = classification_report(y_train, y_train_pred)
print("Akurasi pada Data Pelatihan:", train_accuracy)
print("Laporan Klasifikasi:")
print(train_report)
print("-" * 50)

# 6. Prediksi pada data test
y_test_pred = xgb_model.predict(X_test)

print("Hasil evaluasi menggunakan data test:")
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)
print("Akurasi pada Data Test:", test_accuracy)
print("Laporan Klasifikasi:")
print(test_report)

Parameters: { "use_label_encoder" } are not used.



Hasil evaluasi menggunakan data pelatihan:
Akurasi pada Data Pelatihan: 0.999763903231216
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    226790
           1       1.00      1.00      1.00    226414

    accuracy                           1.00    453204
   macro avg       1.00      1.00      1.00    453204
weighted avg       1.00      1.00      1.00    453204

--------------------------------------------------
Hasil evaluasi menggunakan data test:
Akurasi pada Data Test: 0.9992409666201832
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302



Skenario 4 : Dataset 2, Dengan KFold

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Menyimpan hasil evaluasi
fold = 1
accuracy_scores = []

print("Evaluasi Model XGBoost Menggunakan K-Fold Cross Validation (k=5)\n")

for train_index, test_index in kf.split(X):
    # Membagi data berdasarkan indeks dari K-Fold
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    xgb_model.fit(X_train, y_train)

    # Prediksi pada data test
    y_pred = xgb_model.predict(X_test)

    # Evaluasi performa model
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(fold_accuracy)

    print(f"Fold {fold}")
    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

    fold += 1

# Rata-rata akurasi
mean_accuracy = sum(accuracy_scores) / len(accuracy_scores)
print("Rata-rata Akurasi dari 5-Fold Cross Validation:", mean_accuracy)


Evaluasi Model XGBoost Menggunakan K-Fold Cross Validation (k=5)



Parameters: { "use_label_encoder" } are not used.



Fold 1
Akurasi: 0.9991880108029867
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 2
Akurasi: 0.9992762641106433
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56751
           1       1.00      1.00      1.00     56550

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 3
Akurasi: 0.999311568300368
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56687
           1       1.00      1.00      1.00     56614

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 4
Akurasi: 0.9992056557311939
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56597
           1       1.00      1.00      1.00     56704

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------


Parameters: { "use_label_encoder" } are not used.



Fold 5
Akurasi: 0.9993380464426616
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56755
           1       1.00      1.00      1.00     56546

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------
Rata-rata Akurasi dari 5-Fold Cross Validation: 0.9992639090775708
