In [None]:
#Import Library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib

In [None]:
# Gunakan dataset hasil clustering yang memiliki fitur Target
# Lengkapi kode berikut
df = pd.read_csv('data_clustering_inverse.csv')

In [104]:
# Tampilkan 5 baris pertama dengan function head.
df.head()

Unnamed: 0,TransactionAmount,TransactionType,Location,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,AccountBalance_Binned,Target
0,27036.8565,Debit,San Diego,ATM,4358.0,Doctor,23500.0,5.0,76053120.0,1,2
1,721948.384,Debit,Houston,ATM,4234.0,Doctor,40900.0,5.0,204687800.0,2,3
2,242331.8265,Debit,Mesa,Online,1196.0,Student,16250.0,5.0,16697010.0,0,1
3,354028.085,Debit,Raleigh,Online,1630.0,Student,7260.0,5.0,127479800.0,1,2
4,176822.2875,Debit,Oklahoma City,ATM,1134.0,Student,49890.0,5.0,11628950.0,0,1


In [105]:
X = df.drop('Target', axis=1)
y = df['Target']

categorical_cols = X.select_dtypes(include=['object']).columns
le_dict = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    le_dict[col] = le

numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Menggunakan train_test_split() untuk melakukan pembagian dataset.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [106]:
# Buatlah model klasifikasi menggunakan Decision Tree
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred_dt = model.predict(X_test)

In [107]:
# Menyimpan Model
joblib.dump(model, 'decision_tree_model.h5')

['decision_tree_model.h5']

In [108]:
# Melatih model menggunakan algoritma klasifikasi scikit-learn selain Decision Tree.
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)


In [109]:
# Menampilkan hasil evaluasi akurasi, presisi, recall, dan F1-Score pada seluruh algoritma yang sudah dibuat.
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
f1_dt = f1_score(y_test, y_pred_dt, average='weighted')

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
f1_svm = f1_score(y_test, y_pred_svm, average='weighted')

results = pd.DataFrame({
    'Model': ['Decision Tree', 'SVM'],
    'Accuracy': [accuracy_dt, accuracy_svm],
    'Precision': [precision_dt, precision_svm],
    'Recall': [recall_dt, recall_svm],
    'F1-Score': [f1_dt, f1_svm]
})

results

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Decision Tree,1.0,1.0,1.0,1.0
1,SVM,0.914583,0.916606,0.914583,0.914912


In [110]:
# Menyimpan Model Selain Decision Tree
# Model ini bisa lebih dari satu
joblib.dump(svm_model, 'explore_SVM_classification.h5')

['explore_SVM_classification.h5']

In [111]:
# Lakukan Hyperparameter Tuning dan Latih ulang.
# Lakukan dalam satu cell ini saja.
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

svm = SVC(random_state=42)

grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
best_svm = grid_search.best_estimator_
y_pred_svm_tuned = best_svm.predict(X_test)

In [112]:
# Menampilkan hasil evaluasi akurasi, presisi, recall, dan F1-Score pada algoritma yang sudah dituning.
accuracy = accuracy_score(y_test, y_pred_svm_tuned)
precision = precision_score(y_test, y_pred_svm_tuned, average='weighted')
recall = recall_score(y_test, y_pred_svm_tuned, average='weighted')
f1 = f1_score(y_test, y_pred_svm_tuned, average='weighted')
tuning_results = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score'],
    'Score': [accuracy, precision, recall, f1]
})
tuning_results

Unnamed: 0,Metric,Score
0,Accuracy,1.0
1,Precision,1.0
2,Recall,1.0
3,F1-Score,1.0


In [113]:
# Menyimpan Model hasil tuning
joblib.dump(best_svm, 'tuning_classification.h5')

['tuning_classification.h5']