# Skenario 1 : Dataset 1 , Tanpa KFOLD


In [None]:
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

data_train = pd.read_csv('/content/dataset1.csv')

# Handle non-finite values in the 'Class' column
data_train = data_train.dropna(subset=['Class'])  # Drop NaN
data_train = data_train[~data_train['Class'].isin([float('inf'), float('-inf')])]  # Drop inf/-inf

X_train = data_train.drop('Class', axis=1)
y_train = data_train['Class'].astype('int64')

X_train,X_test,y_train,y_test =train_test_split(X_train,y_train,test_size=0.2,random_state=42)

# Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Prediksi pada data pelatihan
y_pred = rf_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan")
# Evaluasi performa model
rf_accuracy = accuracy_score(y_train, y_pred)
rf_report = classification_report(y_train, y_pred)
print("Akurasi Random Forest:", rf_accuracy)
print("Laporan Klasifikasi:")
print(rf_report)

# Prediksi pada data test
y_pred = rf_model.predict(X_test)

print("Hasil evaluasi menggunakan data test")
# Evaluasi performa model
rf_accuracy = accuracy_score(y_test, y_pred)
rf_report = classification_report(y_test, y_pred)
print("Akurasi Random Forest:", rf_accuracy)
print("Laporan Klasifikasi:")
print(rf_report)

Hasil evaluasi menggunakan data pelatihan
Akurasi Random Forest: 1.0
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    227565
           1       1.00      1.00      1.00    227339

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

Hasil evaluasi menggunakan data test
Akurasi Random Forest: 0.9998593109755025
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726



# Skenario 2 : Dataset 1, Dengan KFOLD

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('/content/dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Menyimpan hasil evaluasi
fold = 1
accuracy_scores = []

print("Evaluasi Model Menggunakan K-Fold Cross Validation (k=5)\n")

for train_index, test_index in kf.split(X):
    # Membagi data berdasarkan indeks dari K-Fold
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    rf_model.fit(X_train, y_train)

    # Prediksi pada data test
    y_pred = rf_model.predict(X_test)

    # Evaluasi performa model
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(fold_accuracy)

    print(f"Fold {fold}")
    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

    fold += 1

# Rata-rata akurasi
mean_accuracy = sum(accuracy_scores) / len(accuracy_scores)
print("Rata-rata Akurasi dari 5-Fold Cross Validation:", mean_accuracy)

Evaluasi Model Menggunakan K-Fold Cross Validation (k=5)

Fold 1
Akurasi: 0.9998417248474404
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 2
Akurasi: 0.9998505179114714
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56950
           1       1.00      1.00      1.00     56776

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

--------------------------------------------------
Fold 3
Akurasi: 0.999815345655347
Laporan Klasifikasi:
              preci

# Skenario 3 : Dataset 2, Tanpa KFOLD

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

data_train = pd.read_csv('/content/dataset2.csv')

X_train = data_train.drop('Class', axis=1)
y_train = data_train['Class'].astype('int64')

X_train,X_test,y_train,y_test =train_test_split(X_train,y_train,test_size=0.2,random_state=42)

# Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Prediksi pada data pelatihan
y_pred = rf_model.predict(X_train)

print("Hasil evaluasi menggunakan data pelatihan")
# Evaluasi performa model
rf_accuracy = accuracy_score(y_train, y_pred)
rf_report = classification_report(y_train, y_pred)
print("Akurasi Random Forest:", rf_accuracy)
print("Laporan Klasifikasi:")
print(rf_report)

# Prediksi pada data test
y_pred = rf_model.predict(X_test)

print("Hasil evaluasi menggunakan data test")
# Evaluasi performa model
rf_accuracy = accuracy_score(y_test, y_pred)
rf_report = classification_report(y_test, y_pred)
print("Akurasi Random Forest:", rf_accuracy)
print("Laporan Klasifikasi:")
print(rf_report)

Hasil evaluasi menggunakan data pelatihan
Akurasi Random Forest: 1.0
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    226790
           1       1.00      1.00      1.00    226414

    accuracy                           1.00    453204
   macro avg       1.00      1.00      1.00    453204
weighted avg       1.00      1.00      1.00    453204

Hasil evaluasi menggunakan data test
Akurasi Random Forest: 0.9997616988226157
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302



# Skenario 4 : Dataset 2, Dengan KFOLD

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold

# 1. Import dataset
data_train = pd.read_csv('/content/dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Menyimpan hasil evaluasi
fold = 1
accuracy_scores = []

print("Evaluasi Model Menggunakan K-Fold Cross Validation (k=5)\n")

for train_index, test_index in kf.split(X):
    # Membagi data berdasarkan indeks dari K-Fold
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Melatih model
    rf_model.fit(X_train, y_train)

    # Prediksi pada data test
    y_pred = rf_model.predict(X_test)

    # Evaluasi performa model
    fold_accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(fold_accuracy)

    print(f"Fold {fold}")
    print("Akurasi:", fold_accuracy)
    print("Laporan Klasifikasi:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

    fold += 1

# Rata-rata akurasi
mean_accuracy = sum(accuracy_scores) / len(accuracy_scores)
print("Rata-rata Akurasi dari 5-Fold Cross Validation:", mean_accuracy)

Evaluasi Model Menggunakan K-Fold Cross Validation (k=5)

Fold 1
Akurasi: 0.9996999170358863
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56463
           1       1.00      1.00      1.00     56839

    accuracy                           1.00    113302
   macro avg       1.00      1.00      1.00    113302
weighted avg       1.00      1.00      1.00    113302

--------------------------------------------------
Fold 2
Akurasi: 0.9997616967193582
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56751
           1       1.00      1.00      1.00     56550

    accuracy                           1.00    113301
   macro avg       1.00      1.00      1.00    113301
weighted avg       1.00      1.00      1.00    113301

--------------------------------------------------
Fold 3
Akurasi: 0.9997440446244958
Laporan Klasifikasi:
              prec