In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.feature_selection import SelectFromModel
from sklearn.decomposition import PCA
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed, Dropout


In [2]:
# ----------------#
# 1. IMPORT DATA  #
# ----------------#

data = pd.read_csv('/content/drive/MyDrive/ML KAH/dataset/cdc_diabetes_health_indicators.csv')
print("Data shape:", data.shape)
print("\nSample data:")
print(data.head())
print("\nData info:")
print(data.info())
print("\nStatistik deskriptif:")
print(data.describe())

Data shape: (253680, 22)

Sample data:
   HighBP  HighChol  CholCheck  BMI  Smoker  Stroke  HeartDiseaseorAttack  \
0       1         1          1   40       1       0                     0   
1       0         0          0   25       1       0                     0   
2       1         1          1   28       0       0                     0   
3       1         0          1   27       0       0                     0   
4       1         1          1   24       0       0                     0   

   PhysActivity  Fruits  Veggies  ...  NoDocbcCost  GenHlth  MentHlth  \
0             0       0        1  ...            0        5        18   
1             1       0        0  ...            1        3         0   
2             0       1        0  ...            1        5        30   
3             1       1        1  ...            0        2         0   
4             1       1        1  ...            0        2         3   

   PhysHlth  DiffWalk  Sex  Age  Education  Income  target 

In [3]:
# -----------------#
# 2. Preprocessing #
# -----------------#

print("\n=== 2. Preprocessing ===")
# Pisahkan fitur dan target
X = data.drop('target', axis=1)
if 'ID' in X.columns:
    X = X.drop('ID', axis=1)  # Hapus kolom ID jika ada
y = data['target']

print("Fitur shape:", X.shape)
print("Target shape:", y.shape)

# Normalisasi/Standardisasi data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

print("Data setelah scaling (sample):")
print(X_scaled_df.head())


=== 2. Preprocessing ===
Fitur shape: (253680, 21)
Target shape: (253680,)
Data setelah scaling (sample):
     HighBP  HighChol  CholCheck       BMI    Smoker    Stroke  \
0  1.153688  1.165254   0.196922  1.757936  1.120927 -0.205637   
1 -0.866785 -0.858182  -5.078164 -0.511806  1.120927 -0.205637   
2  1.153688  1.165254   0.196922 -0.057858 -0.892119 -0.205637   
3  1.153688 -0.858182   0.196922 -0.209174 -0.892119 -0.205637   
4  1.153688  1.165254   0.196922 -0.663122 -0.892119 -0.205637   

   HeartDiseaseorAttack  PhysActivity    Fruits   Veggies  ...  AnyHealthcare  \
0             -0.322458     -1.762814 -1.316872  0.482087  ...       0.226863   
1             -0.322458      0.567275 -1.316872 -2.074316  ...      -4.407954   
2             -0.322458     -1.762814  0.759375 -2.074316  ...       0.226863   
3             -0.322458      0.567275  0.759375  0.482087  ...       0.226863   
4             -0.322458      0.567275  0.759375  0.482087  ...       0.226863   

   NoDocb

In [4]:
# ----------------------------------------#
# 3. Feature Selection use  Random Forest #
# ----------------------------------------#

print("\n=== 3. Feature Selection dengan Random Forest ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_scaled, y)

# Plotting feature importances
feature_importances = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=False)

print("Feature importances:")
print(feature_importances)

plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importances[:10])
plt.title('Top 10 Feature Importances')
plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ML KAH/feature_importances.png')
plt.close()

# Pilih fitur berdasarkan threshold importance
selector = SelectFromModel(rf, threshold='mean', prefit=True)
X_selected = selector.transform(X_scaled)
selected_feature_indices = selector.get_support()
selected_features = [feature for feature, selected in zip(X.columns, selected_feature_indices) if selected]

print(f"Jumlah fitur terpilih: {X_selected.shape[1]} dari {X_scaled.shape[1]}")
print("Fitur terpilih:", selected_features)


=== 3. Feature Selection dengan Random Forest ===
Feature importances:
                 Feature  Importance
3                    BMI    0.184692
18                   Age    0.122642
20                Income    0.099875
15              PhysHlth    0.085016
19             Education    0.070201
13               GenHlth    0.069541
14              MentHlth    0.064435
0                 HighBP    0.043401
8                 Fruits    0.032727
4                 Smoker    0.032694
17                   Sex    0.027621
1               HighChol    0.026871
9                Veggies    0.026457
7           PhysActivity    0.025682
16              DiffWalk    0.023368
6   HeartDiseaseorAttack    0.017700
12           NoDocbcCost    0.015076
5                 Stroke    0.012153
11         AnyHealthcare    0.008578
10     HvyAlcoholConsump    0.007665
2              CholCheck    0.003603
Jumlah fitur terpilih: 7 dari 21
Fitur terpilih: ['BMI', 'GenHlth', 'MentHlth', 'PhysHlth', 'Age', 'Education', 'I

In [6]:
# ----------------------------#
# 4. Split Train/Test (75:25) #
# ----------------------------#

print("\n=== 4. Split Train/Test (75:25) ===")
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.25, random_state=42, stratify=y)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")


=== 4. Split Train/Test (75:25) ===
X_train shape: (190260, 7)
X_test shape: (63420, 7)


In [None]:
# -------------------------------------------------#
# 5. Optional: LSTM Autoencoder Feature Extraction #
# -------------------------------------------------#

print("\n=== 5. LSTM Autoencoder untuk Feature Extraction ===")
#  Reshape data untuk LSTM [samples, timesteps, features]
timesteps = 1
X_train_lstm = X_train.reshape(X_train.shape[0], timesteps, X_train.shape[1])
X_test_lstm = X_test.reshape(X_test.shape[0], timesteps, X_test.shape[1])

# Buat model LSTM Autoencoder
input_dim = X_train.shape[1]
encoding_dim = max(2, input_dim // 2)  # Setidaknya 2 fitur hasil encoding

input_layer = Input(shape=(timesteps, input_dim))
# Encoder
encoder = LSTM(encoding_dim, activation='relu', return_sequences=False)(input_layer)
# Decoder
decoder = RepeatVector(timesteps)(encoder)
decoder = LSTM(input_dim, activation='relu', return_sequences=True)(decoder)
decoder = TimeDistributed(Dense(input_dim))(decoder)

# Autoencoder model
autoencoder = Model(inputs=input_layer, outputs=decoder)
encoder_model = Model(inputs=input_layer, outputs=encoder)

# Compile model
autoencoder.compile(optimizer='adam', loss='mse')

# Model summary
print("LSTM Autoencoder Summary:")
autoencoder.summary()

# Train autoencoder
history = autoencoder.fit(
    X_train_lstm, X_train_lstm,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Autoencoder Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.savefig('/content/drive/MyDrive/ML KAH/autoencoder_loss.png')
plt.close()

# Extract features using encoder
X_train_encoded = encoder_model.predict(X_train_lstm)
X_test_encoded = encoder_model.predict(X_test_lstm)

print(f"Encoded features shape: {X_train_encoded.shape}")


=== 5. LSTM Autoencoder untuk Feature Extraction ===
LSTM Autoencoder Summary:


Epoch 1/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - loss: 0.5453 - val_loss: 0.2814
Epoch 2/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 0.2694 - val_loss: 0.2551
Epoch 3/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - loss: 0.2495 - val_loss: 0.2497
Epoch 4/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - loss: 0.2454 - val_loss: 0.2493
Epoch 5/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 5ms/step - loss: 0.2464 - val_loss: 0.2481
Epoch 6/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 4ms/step - loss: 0.2451 - val_loss: 0.2465
Epoch 7/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - loss: 0.2455 - val_loss: 0.2456
Epoch 8/50
[1m4757/4757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - loss: 0.2445 - val_loss: 0.2455
Epoch 9/50
[1m4

In [None]:
# ----------------------------------------------------------#
# 6. Feature Fusion (PCA + Random Forest selected features) #
# ----------------------------------------------------------#

print("\n=== 6. Feature Fusion (PCA + Random Forest selected features) ===")
# Implementasi feature fusion dengan Original features + Encoded features
X_train_original = X_train
X_test_original = X_test

# Gabungkan fitur asli dengan fitur hasil encoding
X_train_fused = np.hstack((X_train_original, X_train_encoded))
X_test_fused = np.hstack((X_test_original, X_test_encoded))

print(f"Feature fusion shape: {X_train_fused.shape}")

# Custom Feature Fusion untuk fitur tertentu
X_train_df = pd.DataFrame(X_train, columns=selected_features)
X_test_df = pd.DataFrame(X_test, columns=selected_features)

# Jika ada kolom BMI dan PhysActivity dalam fitur terpilih, buat fitur baru
if 'BMI' in selected_features and 'PhysActivity' in selected_features:
    # Dapatkan indeks original dari kolom ini
    bmi_idx = selected_features.index('BMI')
    physact_idx = selected_features.index('PhysActivity')

    # Buat fitur baru BMI_Activity: BMI/(PhysActivity+1) untuk menghindari div/0
    X_train_fused_df = pd.DataFrame(X_train_fused)
    X_test_fused_df = pd.DataFrame(X_test_fused)

    # Buat kolom baru di dataframe
    X_train_fused_df['BMI_Activity'] = X_train_df['BMI'] / (X_train_df['PhysActivity'] + 1)
    X_test_fused_df['BMI_Activity'] = X_test_df['BMI'] / (X_test_df['PhysActivity'] + 1)

    # Konversi kembali ke numpy array
    X_train_fused = X_train_fused_df.values
    X_test_fused = X_test_fused_df.values

    print("Fitur baru BMI_Activity ditambahkan")

# Jika ada kolom HighBP dan HighChol dalam fitur terpilih, buat fitur baru HighRisk
if 'HighBP' in selected_features and 'HighChol' in selected_features:
    highbp_idx = selected_features.index('HighBP')
    highchol_idx = selected_features.index('HighChol')

    # Buat fitur baru HighRisk: 1 jika HighBP=1 ATAU HighChol=1, 0 jika tidak
    X_train_fused_df = pd.DataFrame(X_train_fused)
    X_test_fused_df = pd.DataFrame(X_test_fused)

    # Buat kolom baru di dataframe
    X_train_fused_df['HighRisk'] = ((X_train_df['HighBP'] + X_train_df['HighChol']) > 0).astype(int)
    X_test_fused_df['HighRisk'] = ((X_test_df['HighBP'] + X_test_df['HighChol']) > 0).astype(int)

    # Konversi kembali ke numpy array
    X_train_fused = X_train_fused_df.values
    X_test_fused = X_test_fused_df.values

    print("Fitur baru HighRisk ditambahkan")


=== 6. Feature Fusion (PCA + Random Forest selected features) ===
Feature fusion shape: (190260, 10)


In [None]:
# --------------------------------------------------#
# 7. Feature Selection Ulang setelah Feature Fusion #
# --------------------------------------------------#

print("\n=== 7. Feature Selection Ulang setelah Feature Fusion ===")
rf_after_fusion = RandomForestClassifier(n_estimators=100, random_state=42)
rf_after_fusion.fit(X_train_fused, y_train)

# Pilih fitur lagi setelah fusion
selector_after_fusion = SelectFromModel(rf_after_fusion, threshold='mean', prefit=True)
X_train_final = selector_after_fusion.transform(X_train_fused)
X_test_final = selector_after_fusion.transform(X_test_fused)

print(f"Jumlah fitur final: {X_train_final.shape[1]} dari {X_train_fused.shape[1]}")


=== 7. Feature Selection Ulang setelah Feature Fusion ===
Jumlah fitur final: 4 dari 10


In [None]:
# 8. Train Final Model (XGBoost + Cross Validation)
print("\n=== 8. Train Final Model (XGBoost + Cross Validation) ===")

# Import XGBoost
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, cross_val_score

# Parameter tuning untuk XGBoost
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'gamma': [0, 0.1]
}

# Grid search with cross-validation
xgb_grid = GridSearchCV(
    xgb.XGBClassifier(objective='binary:logistic', random_state=42, use_label_encoder=False, eval_metric='logloss'),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

xgb_grid.fit(X_train_final, y_train)

# Best model parameters
print("Best parameters:", xgb_grid.best_params_)
print("Best cross-validation score: {:.4f}".format(xgb_grid.best_score_))

# Get the best model
best_xgb = xgb_grid.best_estimator_

# Cross-validation with the best model
cv_scores = cross_val_score(best_xgb, X_train_final, y_train, cv=5, scoring='accuracy')
print("Cross-validation accuracy: {:.4f} ± {:.4f}".format(cv_scores.mean(), cv_scores.std()))

# Feature importance from XGBoost
feature_importances = best_xgb.feature_importances_
print("\nXGBoost Feature Importances:")
for i, importance in enumerate(feature_importances):
    print(f"Feature {i}: {importance}")

# Optional: Plot XGBoost feature importances
plt.figure(figsize=(10, 6))
xgb.plot_importance(best_xgb, max_num_features=20)
plt.title('XGBoost Feature Importances')
plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ML KAH/xgboost_feature_importances.png')
plt.close()


=== 8. Train Final Model (XGBoost + Cross Validation dengan Sampling) ===
Dataset training asli: (190260, 4)
Dataset sample untuk tuning parameter: (38052, 4) (20.0% dari data training)
Melakukan grid search dengan 18 kombinasi parameter...
Memulai GridSearchCV pada sample data...
Fitting 5 folds for each of 18 candidates, totalling 90 fits
Parameter terbaik: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Skor cross-validation terbaik: 0.8619

Melatih model final dengan parameter terbaik pada seluruh data training...
Melatih model XGBoost final...
Mencoba training dengan early stopping...
[0]	validation_0-logloss:0.40717
[1]	validation_0-logloss:0.39783
[2]	validation_0-logloss:0.39337
[3]	validation_0-logloss:0.38565
[4]	validation_0-logloss:0.37929
[5]	validation_0-logloss:0.37370
[6]	validation_0-logloss:0.36890
[7]	validation_0-logloss:0.36475
[8]	validation_0-logloss:0.36122
[9]	validation_0-logloss:0.35813
[10]	validation_0

<Figure size 1200x600 with 0 Axes>

In [None]:

# 9. Final Testing
print("\n=== 9. Final Testing ===")
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.metrics import matthews_corrcoef, cohen_kappa_score, precision_recall_curve, average_precision_score

# Prediksi pada data test
y_pred = best_xgb.predict(X_test_final)
y_pred_proba = best_xgb.predict_proba(X_test_final)[:, 1]

# Evaluasi model - metrics untuk publikasi jurnal
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc_value = roc_auc_score(y_test, y_pred_proba)
mcc = matthews_corrcoef(y_test, y_pred)  # Matthews Correlation Coefficient
kappa = cohen_kappa_score(y_test, y_pred)  # Cohen's Kappa
average_precision = average_precision_score(y_test, y_pred_proba)  # Average Precision

# Tampilkan hasil metrik untuk publikasi jurnal
print("\n" + "="*50)
print("HASIL EVALUASI MODEL UNTUK PUBLIKASI JURNAL")
print("="*50)
print(f"Accuracy      : {accuracy:.4f}")
print(f"Precision     : {precision:.4f}")
print(f"Recall        : {recall:.4f}")
print(f"F1-Score      : {f1:.4f}")
print(f"ROC AUC       : {roc_auc_value:.4f}")
print(f"MCC           : {mcc:.4f}")
print(f"Cohen's Kappa : {kappa:.4f}")
print(f"Avg Precision : {average_precision:.4f}")
print("="*50)

# Simpan hasil untuk jurnal
with open('/content/drive/MyDrive/ML KAH/hasil_evaluasi_untuk_jurnal.txt', 'w') as f:
    f.write("HASIL EVALUASI MODEL UNTUK PUBLIKASI JURNAL\n")
    f.write("="*50 + "\n")
    f.write(f"Accuracy      : {accuracy:.4f}\n")
    f.write(f"Precision     : {precision:.4f}\n")
    f.write(f"Recall        : {recall:.4f}\n")
    f.write(f"F1-Score      : {f1:.4f}\n")
    f.write(f"ROC AUC       : {roc_auc_value:.4f}\n")
    f.write(f"MCC           : {mcc:.4f}\n")
    f.write(f"Cohen's Kappa : {kappa:.4f}\n")
    f.write(f"Avg Precision : {average_precision:.4f}\n")
    f.write("="*50 + "\n\n")
    f.write("Classification Report:\n")
    f.write(classification_report(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Simpan model terbaik
import joblib
import pandas as pd
from datetime import datetime

# Simpan model
joblib.dump(best_xgb, '/content/drive/MyDrive/ML KAH/best_xgb_model.joblib')
joblib.dump(scaler, '/content/drive/MyDrive/ML KAH/scaler.joblib')
joblib.dump(selector, '/content/drive/MyDrive/ML KAH/feature_selector.joblib')
joblib.dump(selector_after_fusion, '/content/drive/MyDrive/ML KAH/feature_selector_after_fusion.joblib')
joblib.dump(encoder_model, '/content/drive/MyDrive/ML KAH/lstm_encoder.joblib')

# Simpan hasil eksperimen untuk jurnal
experiment_results = {
    'Model': 'XGBoost + LSTM Autoencoder + Feature Fusion',
    'Tanggal': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    'Dataset_Size': X.shape[0],
    'Original_Features': X.shape[1],
    'Selected_Features': len(selected_features),
    'Final_Features': X_train_final.shape[1],
    'Best_Parameters': xgb_grid.best_params_,
    'CV_Accuracy_Mean': cv_scores.mean(),
    'CV_Accuracy_Std': cv_scores.std(),
    'Test_Accuracy': accuracy,
    'Test_Precision': precision,
    'Test_Recall': recall,
    'Test_F1': f1,
    'Test_ROC_AUC': roc_auc_value,
    'Test_MCC': mcc,
    'Test_Kappa': kappa,
    'Test_Avg_Precision': average_precision
}

# Simpan dalam format CSV
pd.DataFrame([experiment_results]).to_csv('hasil_eksperimen_untuk_jurnal.csv', index=False)

# Simpan hasil dengan format tabel untuk jurnal
with open('/content/drive/MyDrive/ML KAH/hasil_table_untuk_jurnal.txt', 'w') as f:
    f.write("Table X: Performance metrics of the proposed hybrid model for diabetes prediction\n\n")
    f.write("| Metric | Value |\n")
    f.write("|--------|-------|\n")
    f.write(f"| Accuracy | {accuracy:.4f} |\n")
    f.write(f"| Precision | {precision:.4f} |\n")
    f.write(f"| Recall | {recall:.4f} |\n")
    f.write(f"| F1-Score | {f1:.4f} |\n")
    f.write(f"| ROC AUC | {roc_auc_value:.4f} |\n")
    f.write(f"| Matthews Correlation Coefficient | {mcc:.4f} |\n")
    f.write(f"| Cohen's Kappa | {kappa:.4f} |\n")
    f.write(f"| Average Precision | {average_precision:.4f} |\n\n")
    f.write("*Note: The proposed model combines XGBoost with LSTM Autoencoder feature extraction and custom feature fusion techniques.")

print("\nProses selesai! Model terbaik tersimpan sebagai 'best_xgb_model.joblib'")

# Buat fungsi untuk prediksi data baru
def predict_diabetes(new_data):
    """
    Memprediksi diabetes pada data baru

    Parameters:
    -----------
    new_data : pandas DataFrame
        Data baru yang akan diprediksi, harus memiliki kolom yang sama dengan dataset asli

    Returns:
    --------
    prediksi : array
        Hasil prediksi (0: tidak diabetes, 1: diabetes)
    probabilitas : array
        Probabilitas prediksi
    """
    # Preprocessing
    if 'ID' in new_data.columns:
        new_data = new_data.drop('ID', axis=1)
    if 'Diabetes_binary' in new_data.columns:
        new_data = new_data.drop('Diabetes_binary', axis=1)

    # Scaling
    X_new_scaled = scaler.transform(new_data)

    # Feature selection pertama
    X_new_selected = selector.transform(X_new_scaled)

    # LSTM encoding
    X_new_lstm = X_new_selected.reshape(X_new_selected.shape[0], 1, X_new_selected.shape[1])
    X_new_encoded = encoder_model.predict(X_new_lstm)

    # Feature fusion
    X_new_fused = np.hstack((X_new_selected, X_new_encoded))

    # Feature selection final
    X_new_final = selector_after_fusion.transform(X_new_fused)

    # Prediksi
    prediction = best_xgb.predict(X_new_final)
    probability = best_xgb.predict_proba(X_new_final)[:, 1]

    return prediction, probability

print("\nContoh penggunaan fungsi prediksi:")
print("prediction, probability = predict_diabetes(new_patient_data)")


=== 9. Final Testing ===

HASIL EVALUASI MODEL UNTUK PUBLIKASI JURNAL
Accuracy      : 0.8608
Precision     : 0.5235
Recall        : 0.0101
F1-Score      : 0.0198
ROC AUC       : 0.7898
MCC           : 0.0575
Cohen's Kappa : 0.0146
Avg Precision : 0.3479

Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.93     54583
           1       0.52      0.01      0.02      8837

    accuracy                           0.86     63420
   macro avg       0.69      0.50      0.47     63420
weighted avg       0.81      0.86      0.80     63420

