<a href="https://colab.research.google.com/github/mariorizki-lang/mid-term-deep-learning/blob/main/midterm_classificication_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Instalasi dependensi dan pengunduhan dataset
!pip install -q gdown scikit-learn tensorflow

In [None]:
# Cell 2: Memuat dan menyiapkan dataset
import pandas as pd
import numpy as np

# Muat dataset
df = pd.read_csv('clusteringmidterm.csv')

# Hapus kolom CUST_ID jika ada
if 'CUST_ID' in df.columns:
    df.drop(columns=['CUST_ID'], inplace=True)

# Cek kolom BALANCE
assert 'BALANCE' in df.columns, 'Kolom BALANCE tidak ditemukan'

# Membuat target label HighBalance
df['HighBalance'] = (df['BALANCE'] >= df['BALANCE'].median()).astype(int)

# Pisahkan fitur dan target
X = df.drop(columns=['HighBalance'])
y = df['HighBalance']

print('Dataset shape:', df.shape)
print('Distribusi label:')
print(y.value_counts())

Dataset shape: (8950, 18)
Distribusi label:
HighBalance
0    4475
1    4475
Name: count, dtype: int64


In [None]:
# Cell 3: Pra-pemrosesan dan pembagian data
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

# Semua fitur numerik
numeric_cols = X.columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='median'), numeric_cols)
    ],
    remainder='drop'
)

# Imputasi
X_imputed = preprocessor.fit_transform(X)

# Standarisasi
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y.values, test_size=0.2, random_state=42, stratify=y
)

print('Train shape:', X_train.shape, y_train.shape)
print('Test shape:', X_test.shape, y_test.shape)

Train shape: (7160, 17) (7160,)
Test shape: (1790, 17) (1790,)


In [None]:
# Cell 4: Membangun dan melatih model jaringan saraf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Arsitektur model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Kompilasi model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Pelatihan
history = model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=2
)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


90/90 - 2s - 21ms/step - accuracy: 0.8523 - loss: 0.3575 - val_accuracy: 0.9378 - val_loss: 0.1733
Epoch 2/50
90/90 - 0s - 4ms/step - accuracy: 0.9314 - loss: 0.1767 - val_accuracy: 0.9707 - val_loss: 0.1052
Epoch 3/50
90/90 - 0s - 3ms/step - accuracy: 0.9476 - loss: 0.1324 - val_accuracy: 0.9735 - val_loss: 0.0867
Epoch 4/50
90/90 - 0s - 3ms/step - accuracy: 0.9576 - loss: 0.1086 - val_accuracy: 0.9797 - val_loss: 0.0684
Epoch 5/50
90/90 - 0s - 4ms/step - accuracy: 0.9607 - loss: 0.0938 - val_accuracy: 0.9832 - val_loss: 0.0536
Epoch 6/50
90/90 - 0s - 4ms/step - accuracy: 0.9701 - loss: 0.0823 - val_accuracy: 0.9832 - val_loss: 0.0479
Epoch 7/50
90/90 - 0s - 4ms/step - accuracy: 0.9701 - loss: 0.0703 - val_accuracy: 0.9853 - val_loss: 0.0440
Epoch 8/50
90/90 - 0s - 4ms/step - accuracy: 0.9747 - loss: 0.0648 - val_accuracy: 0.9811 - val_loss: 0.0421
Epoch 9/50
90/90 - 0s - 4ms/step - accuracy: 0.9745 - loss: 0.0580 - val_accuracy: 0.9846 - val_loss: 0.0378
Epoch 10/50
90/90 - 0s - 3ms/

In [None]:
# Cell 5: Evaluasi model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Prediksi probabilitas dan kelas
y_pred_prob = model.predict(X_test).flatten()
y_pred = (y_pred_prob >= 0.5).astype(int)

# Hitung metrik
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_prob)

print('Accuracy:', acc)
print('Precision:', prec)
print('Recall   :', rec)
print('F1 Score:', f1)
print('ROC-AUC :', auc)

# Rincian laporan klasifikasi
from sklearn.metrics import classification_report
print('Classification Report:')
print(classification_report(y_test, y_pred))

[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy: 0.9877094972067039
Precision: 0.9866220735785953
Recall   : 0.9888268156424581
F1 Score: 0.9877232142857143
ROC-AUC : 0.9996691738709779
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       895
           1       0.99      0.99      0.99       895

    accuracy                           0.99      1790
   macro avg       0.99      0.99      0.99      1790
weighted avg       0.99      0.99      0.99      1790

