# Skenario 1 : Dataset 1, Tanpa KFold

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Train-test split (80% data latih, 20% data uji)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_forest.fit(X_train)

# 5. Prediksi
# Isolation Forest mengembalikan -1 untuk outlier dan 1 untuk inlier
y_train_pred = iso_forest.predict(X_train)
y_test_pred = iso_forest.predict(X_test)

# Konversi prediksi (-1 menjadi 0 untuk kesesuaian label biner 0/1)
y_train_pred = [0 if x == -1 else 1 for x in y_train_pred]
y_test_pred = [0 if x == -1 else 1 for x in y_test_pred]

# 6. Evaluasi performa model
print("Hasil Evaluasi Menggunakan Data Train:")
print("Akurasi:", accuracy_score(y_train, y_train_pred))
print("Laporan Klasifikasi:")
print(classification_report(y_train, y_train_pred))

print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi:", accuracy_score(y_test, y_test_pred))
print("Laporan Klasifikasi:")
print(classification_report(y_test, y_test_pred))


Hasil Evaluasi Menggunakan Data Train:
Akurasi: 0.42266500184654343
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.11      0.02      0.04    227565
           1       0.46      0.82      0.59    227339

    accuracy                           0.42    454904
   macro avg       0.29      0.42      0.31    454904
weighted avg       0.29      0.42      0.31    454904


Hasil Evaluasi Menggunakan Data Test:
Akurasi: 0.42320137875244007
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.12      0.02      0.04     56750
           1       0.46      0.82      0.59     56976

    accuracy                           0.42    113726
   macro avg       0.29      0.42      0.31    113726
weighted avg       0.29      0.42      0.31    113726



# Skenario 2 : Dataset 1, Dengan KFold

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = pd.read_csv('dataset1.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Setup K-Fold (misalnya K=5)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)

# 5. K-Fold Cross Validation
for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
    print(f"\nFold {fold}")

    # Pisahkan data latih dan validasi berdasarkan indeks K-Fold
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Latih model
    iso_forest.fit(X_train)

    # Prediksi pada data validasi
    y_val_pred = iso_forest.predict(X_val)

    # Konversi prediksi (-1 menjadi 0 untuk kesesuaian label biner 0/1)
    y_val_pred = [0 if x == -1 else 1 for x in y_val_pred]

    # Evaluasi performa model
    print("Akurasi:", accuracy_score(y_val, y_val_pred))
    print("Laporan Klasifikasi:")
    print(classification_report(y_val, y_val_pred))



Fold 1
Akurasi: 0.42012380634155777
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.10      0.02      0.03     56750
           1       0.46      0.82      0.59     56976

    accuracy                           0.42    113726
   macro avg       0.28      0.42      0.31    113726
weighted avg       0.28      0.42      0.31    113726


Fold 2
Akurasi: 0.42129328385769305
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.12      0.02      0.04     56950
           1       0.46      0.82      0.59     56776

    accuracy                           0.42    113726
   macro avg       0.29      0.42      0.31    113726
weighted avg       0.29      0.42      0.31    113726


Fold 3
Akurasi: 0.4199919103810914
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.10      0.02      0.03     56861
           1       0.46      0.82      0.59     56865

    accur

# Skenario 3 : Dataset 2, Tanpa KFold

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = pd.read_csv('dataset2.csv')

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Train-test split (80% data latih, 20% data uji)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Model Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_forest.fit(X_train)

# 5. Prediksi
# Isolation Forest mengembalikan -1 untuk outlier dan 1 untuk inlier
y_train_pred = iso_forest.predict(X_train)
y_test_pred = iso_forest.predict(X_test)

# Konversi prediksi (-1 menjadi 0 untuk kesesuaian label biner 0/1)
y_train_pred = [0 if x == -1 else 1 for x in y_train_pred]
y_test_pred = [0 if x == -1 else 1 for x in y_test_pred]

# 6. Evaluasi performa model
print("Hasil Evaluasi Menggunakan Data Train:")
print("Akurasi:", accuracy_score(y_train, y_train_pred))
print("Laporan Klasifikasi:")
print(classification_report(y_train, y_train_pred))

print("\nHasil Evaluasi Menggunakan Data Test:")
print("Akurasi:", accuracy_score(y_test, y_test_pred))
print("Laporan Klasifikasi:")
print(classification_report(y_test, y_test_pred))


Hasil Evaluasi Menggunakan Data Train:
Akurasi: 0.4061129204508345
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.03      0.01      0.01    226790
           1       0.45      0.81      0.58    226414

    accuracy                           0.41    453204
   macro avg       0.24      0.41      0.29    453204
weighted avg       0.24      0.41      0.29    453204


Hasil Evaluasi Menggunakan Data Test:
Akurasi: 0.40646237489188186
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.03      0.01      0.01     56463
           1       0.45      0.80      0.58     56839

    accuracy                           0.41    113302
   macro avg       0.24      0.41      0.29    113302
weighted avg       0.24      0.41      0.29    113302



# Skenario 4 : Dataset 2, Dengan KFold

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# Path ke file di Google Drive
file_path = '/content/drive/My Drive/FP RSBP/dataset1.csv'

# Membaca file CSV
df = pd.read_csv(file_path)
print(df.head())

         V1        V3        V4        V5        V7        V8       V10  \
0 -0.260648  2.496266 -0.083724  0.129681  0.519014 -0.130006  0.637735   
1  0.985100  0.558056 -0.429654  0.277140  0.406466 -0.133118  0.529808   
2 -0.260272  1.728538 -0.457986  0.074062  0.743511 -0.095576  0.690708   
3 -0.152152  1.746840 -1.090178  0.249486  0.518269 -0.065130  0.575231   
4 -0.206820  1.527053 -0.448293  0.106125  0.658849 -0.212660  0.968046   

        V11       V12       V13       V14       V17       V18       V19  \
0 -0.987020  0.293438 -0.941386  0.549020  0.512307  0.333644  0.124270   
1  0.140107  1.564246  0.574074  0.627719  0.403810  0.201799 -0.340687   
2 -0.272985  0.659201  0.805173  0.616874  0.886526  0.239442 -2.366079   
3 -0.752581  0.737483  0.592994  0.559535  0.242629  2.178616 -1.345060   
4 -1.203171  1.029577  1.439310  0.241454  0.366466  0.291782  0.445317   

        V23       V26       V28  Class  
0 -0.134794 -0.434824 -0.151045      0  
1  0.079469  0.2

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# 1. Import dataset
data_train = df

# 2. Pisahkan fitur (X) dan label (y)
X = data_train.drop('Class', axis=1)
y = data_train['Class'].astype('int64')

# 3. Setup K-Fold (misalnya K=5)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 4. Model Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)

# 5. K-Fold Cross Validation
for fold, (train_idx, val_idx) in enumerate(kf.split(X), 1):
    print(f"\nFold {fold}")

    # Pisahkan data latih dan validasi berdasarkan indeks K-Fold
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Latih model
    iso_forest.fit(X_train)

    # Prediksi pada data validasi
    y_val_pred = iso_forest.predict(X_val)

    # Konversi prediksi (-1 menjadi 0 untuk kesesuaian label biner 0/1)
    y_val_pred = [0 if x == -1 else 1 for x in y_val_pred]

    # Evaluasi performa model
    print("Akurasi:", accuracy_score(y_val, y_val_pred))
    print("Laporan Klasifikasi:")
    print(classification_report(y_val, y_val_pred))



Fold 1
Akurasi: 0.42012380634155777
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.10      0.02      0.03     56750
           1       0.46      0.82      0.59     56976

    accuracy                           0.42    113726
   macro avg       0.28      0.42      0.31    113726
weighted avg       0.28      0.42      0.31    113726


Fold 2
Akurasi: 0.42129328385769305
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.12      0.02      0.04     56950
           1       0.46      0.82      0.59     56776

    accuracy                           0.42    113726
   macro avg       0.29      0.42      0.31    113726
weighted avg       0.29      0.42      0.31    113726


Fold 3
Akurasi: 0.4199919103810914
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.10      0.02      0.03     56861
           1       0.46      0.82      0.59     56865

    accur