# Skenario 1 : Dataset 1, Tanpa KFold

In [None]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Membagi data menjadi data training dan data testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model CatBoost
catboost_model = CatBoostClassifier(iterations=500,
                                    learning_rate=0.1,
                                    depth=6,
                                    loss_function='Logloss',
                                    random_state=42,
                                    verbose=0)  # verbose=0 untuk menyembunyikan log saat training

# Melatih model
catboost_model.fit(X_train, y_train)

# 5. Prediksi pada data training
y_train_pred = catboost_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan:")
train_accuracy = accuracy_score(y_train, y_train_pred)
train_report = classification_report(y_train, y_train_pred)
print("Akurasi pada Data Pelatihan:", train_accuracy)
print("Laporan Klasifikasi:")
print(train_report)
print("-" * 50)

# 6. Prediksi pada data test
y_test_pred = catboost_model.predict(X_test)

print("Hasil evaluasi menggunakan data test:")
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)
print("Akurasi pada Data Test:", test_accuracy)
print("Laporan Klasifikasi:")
print(test_report)


Hasil evaluasi menggunakan data pelatihan:
Akurasi pada Data Pelatihan: 0.9995801311925153
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    227565
           1       1.00      1.00      1.00    227339

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

--------------------------------------------------
Hasil evaluasi menggunakan data test:
Akurasi pada Data Test: 0.9992965548775126
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726



# Skenario 2 : Dataset 1, Dengan KFold

In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Inisialisasi K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model CatBoost
catboost_model = CatBoostClassifier(iterations=500,
                                    learning_rate=0.1,
                                    depth=6,
                                    loss_function='Logloss',
                                    random_state=42,
                                    verbose=0)

# 5. Variabel untuk menyimpan hasil evaluasi
fold = 1
accuracy_list = []

# 6. Cross Validation Loop
for train_index, test_index in kfold.split(X):
    print(f"Fold {fold}")
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    catboost_model.fit(X_train, y_train)

    # Prediksi dan evaluasi
    y_pred = catboost_model.predict(X_test)
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(fold_accuracy)

    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)
    fold += 1

# 7. Rata-rata Akurasi
print("Rata-rata Akurasi dari 5 Fold Cross Validation:", sum(accuracy_list) / len(accuracy_list))

Fold 1
Akurasi: 0.9991910381091396
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 2
Akurasi: 0.9992613826213882
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56950
           1       1.00      1.00      1.00     56776

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 3
Akurasi: 0.9990063837644866
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.

# Skenario 3 : Dataset 2, Tanpa KFold

In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Membagi data menjadi data training dan data testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model CatBoost
catboost_model = CatBoostClassifier(iterations=500,
                                    learning_rate=0.1,
                                    depth=6,
                                    loss_function='Logloss',
                                    random_state=42,
                                    verbose=0)  # verbose=0 untuk menyembunyikan log saat training

# Melatih model
catboost_model.fit(X_train, y_train)

# 5. Prediksi pada data training
y_train_pred = catboost_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan:")
train_accuracy = accuracy_score(y_train, y_train_pred)
train_report = classification_report(y_train, y_train_pred)
print("Akurasi pada Data Pelatihan:", train_accuracy)
print("Laporan Klasifikasi:")
print(train_report)
print("-" * 50)

# 6. Prediksi pada data test
y_test_pred = catboost_model.predict(X_test)

print("Hasil evaluasi menggunakan data test:")
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred)
print("Akurasi pada Data Test:", test_accuracy)
print("Laporan Klasifikasi:")
print(test_report)


Hasil evaluasi menggunakan data pelatihan:
Akurasi pada Data Pelatihan: 0.9989585264031209
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    226790
           1       1.00      1.00      1.00    226414

    accuracy                           1.00    453204
   macro avg       1.00      1.00      1.00    453204
weighted avg       1.00      1.00      1.00    453204

--------------------------------------------------
Hasil evaluasi menggunakan data test:
Akurasi pada Data Test: 0.9984201514536372
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302



# Skenario 4 : Dataset 2, Dengan KFold

In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Inisialisasi K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model CatBoost
catboost_model = CatBoostClassifier(iterations=500,
                                    learning_rate=0.1,
                                    depth=6,
                                    loss_function='Logloss',
                                    random_state=42,
                                    verbose=0)

# 5. Variabel untuk menyimpan hasil evaluasi
fold = 1
accuracy_list = []

# 6. Cross Validation Loop
for train_index, test_index in kfold.split(X):
    print(f"Fold {fold}")
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    catboost_model.fit(X_train, y_train)

    # Prediksi dan evaluasi
    y_pred = catboost_model.predict(X_test)
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(fold_accuracy)

    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)
    fold += 1

# 7. Rata-rata Akurasi
print("Rata-rata Akurasi dari 5 Fold Cross Validation:", sum(accuracy_list) / len(accuracy_list))

Fold 1
Akurasi: 0.9985348890575629
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302

--------------------------------------------------
Fold 2
Akurasi: 0.9987290491699102
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56751
           1       1.00      1.00      1.00     56550

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------
Fold 3
Akurasi: 0.9989850045454144
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.