# Skenario 1 : Dataset 1, Tanpa KFold

In [None]:
pip install --upgrade lightgbm



In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Train-test split (80% data train, 20% data test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Konfigurasi model LightGBM
lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'accuracy',
    'num_leaves': 31,
    'learning_rate': 0.1,
    'verbose': -1,
    'random_state': 42
}

# 5. Membuat dataset LightGBM
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# 6. Melatih model
print("Training LightGBM...")
model = lgb.train(
    params=lgb_params,
    train_set=lgb_train,
    valid_sets=[lgb_eval]
)

# 7. Prediksi pada data test
y_train_pred = model.predict(X_train)
y_train_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_train_pred]  # Threshold 0.5 untuk klasifikasi biner

# 8. Evaluasi performa model
train_accuracy = accuracy_score(y_train, y_train_pred_binary)
print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi LightGBM:", train_accuracy)
print("Laporan Klasifikasi:")
print(classification_report(y_train, y_train_pred_binary))

# 9. Prediksi pada data test
y_pred = model.predict(X_test)
y_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_pred]  # Threshold 0.5 untuk klasifikasi biner

# 10. Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred_binary)
print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi LightGBM:", accuracy)
print("Laporan Klasifikasi:")
print(classification_report(y_test, y_pred_binary))


Training LightGBM...

Hasil Evaluasi Menggunakan Data Test:
Akurasi LightGBM: 0.9986282820111496
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    227565
           1       1.00      1.00      1.00    227339

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904


Hasil Evaluasi Menggunakan Data Test:
Akurasi LightGBM: 0.9983029386419991
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726



# Skenario 2 : Dataset 1, Dengan KFold

In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Inisialisasi K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Konfigurasi model LightGBM
lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'accuracy',
    'num_leaves': 31,
    'learning_rate': 0.1,
    'verbose': -1,
    'random_state': 42
}

# 5. Variabel untuk menyimpan hasil evaluasi
fold = 1
accuracy_list = []

# 6. Cross Validation Loop
for train_index, test_index in kfold.split(X):
    print(f"Fold {fold}")
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
    model = lgb.train(
    params=lgb_params,
    train_set=lgb_train,
    valid_sets=[lgb_eval]
    )

    # Prediksi dan evaluasi
    y_pred = model.predict(X_test)
    y_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_pred]  # Threshold 0.5 untuk klasifikasi biner

    fold_accuracy = accuracy_score(y_test, y_pred_binary)
    accuracy_list.append(fold_accuracy)

    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred_binary))
    print("-" * 50)
    fold += 1

# 7. Rata-rata Akurasi
print("Rata-rata Akurasi dari 5 Fold Cross Validation:", sum(accuracy_list) / len(accuracy_list))


Fold 1
Akurasi: 0.9984875929866521
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 2
Akurasi: 0.9985491444348698
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56950
           1       1.00      1.00      1.00     56776

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 3
Akurasi: 0.998478799922621
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.0

# Skenario 3 : Dataset 2, Tanpa KFold

In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Train-test split (80% data train, 20% data test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Konfigurasi model LightGBM
lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'accuracy',
    'num_leaves': 31,
    'learning_rate': 0.1,
    'verbose': -1,
    'random_state': 42
}

# 5. Membuat dataset LightGBM
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

# 6. Melatih model
print("Training LightGBM...")
model = lgb.train(
    params=lgb_params,
    train_set=lgb_train,
    valid_sets=[lgb_eval]
)

# 7. Prediksi pada data test
y_train_pred = model.predict(X_train)
y_train_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_train_pred]  # Threshold 0.5 untuk klasifikasi biner

# 8. Evaluasi performa model
train_accuracy = accuracy_score(y_train, y_train_pred_binary)
print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi LightGBM:", train_accuracy)
print("Laporan Klasifikasi:")
print(classification_report(y_train, y_train_pred_binary))

# 9. Prediksi pada data test
y_pred = model.predict(X_test)
y_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_pred]  # Threshold 0.5 untuk klasifikasi biner

# 10. Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred_binary)
print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi LightGBM:", accuracy)
print("Laporan Klasifikasi:")
print(classification_report(y_test, y_pred_binary))


Training LightGBM...

Hasil Evaluasi Menggunakan Data Test:
Akurasi LightGBM: 0.9976346192884441
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    226790
           1       1.00      1.00      1.00    226414

    accuracy                           1.00    453204
   macro avg       1.00      1.00      1.00    453204
weighted avg       1.00      1.00      1.00    453204


Hasil Evaluasi Menggunakan Data Test:
Akurasi LightGBM: 0.9968844327549381
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302



# Skenario 4 : Dataset 2, Dengan KFold

In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Inisialisasi K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Konfigurasi model LightGBM
lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'accuracy',
    'num_leaves': 31,
    'learning_rate': 0.1,
    'verbose': -1,
    'random_state': 42
}

# 5. Variabel untuk menyimpan hasil evaluasi
fold = 1
accuracy_list = []

# 6. Cross Validation Loop
for train_index, test_index in kfold.split(X):
    print(f"Fold {fold}")
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
    model = lgb.train(
    params=lgb_params,
    train_set=lgb_train,
    valid_sets=[lgb_eval]
    )


    # Prediksi dan evaluasi
    y_pred = model.predict(X_test)
    y_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_pred]  # Threshold 0.5 untuk klasifikasi biner

    fold_accuracy = accuracy_score(y_test, y_pred_binary)
    accuracy_list.append(fold_accuracy)

    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred_binary))
    print("-" * 50)
    fold += 1

# 7. Rata-rata Akurasi
print("Rata-rata Akurasi dari 5 Fold Cross Validation:", sum(accuracy_list) / len(accuracy_list))


Fold 1
Akurasi: 0.9972198195971828
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302

--------------------------------------------------
Fold 2
Akurasi: 0.9968049708299133
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56751
           1       1.00      1.00      1.00     56550

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------
Fold 3
Akurasi: 0.9972021429643163
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.