In [61]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.preprocessing import MinMaxScaler 

In [62]:
x_tr_resample = pd.read_csv('../../Data/clean/X_train_smote.csv')
X_test = pd.read_csv('../../Data/clean/X_test.csv')
y_tr_resample = np.loadtxt("../../Data/clean/y_train_smote.csv", delimiter=",")
y_test = np.loadtxt("../../Data/clean/y_test.csv", delimiter=",")

In [63]:
scaler = MinMaxScaler()
x_tr_resample_scaled = scaler.fit_transform(x_tr_resample) 
X_test_scaled = scaler.transform(X_test)  

In [64]:
skf_grid = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [65]:
svm_params = {
    'C': [100, 500, 1000],  # Coba nilai C yang lebih tinggi untuk penalti yang lebih kuat
    'gamma': ['scale', 1, 0.1, 0.01],  # Tambah beberapa nilai gamma spesifik
    'kernel': ['rbf'],  # Coba tambahkan kernel polynomial
    'degree': [2, 3],  # Degree untuk kernel polynomial
    'tol': [1e-3],  # Gunakan toleransi yang lebih ketat
    'decision_function_shape': ['ovr'],
    'class_weight': [None, 'balanced'],  # Masukkan kembali balanced untuk mengatasi ketidakseimbangan kelas
    'shrinking': [True],
    'probability': [True],
    'max_iter': [15000]
}


In [66]:
svm = SVC(random_state=42)

In [67]:
svm_grid = GridSearchCV(svm, svm_params, cv=skf_grid, n_jobs=-1, verbose=1)

In [68]:
svm_grid.fit(x_tr_resample_scaled, y_tr_resample) 

Fitting 10 folds for each of 48 candidates, totalling 480 fits




In [69]:
# Get best parameters
best_params = svm_grid.best_params_

In [70]:
# Create new model with best parameters
best_model = SVC(**best_params, random_state=42)

In [71]:
# Define Stratified K-Fold for final evaluation
skf_eval = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
recalls = []
precisions = []
f1_scores = []

for fold, (train_idx, val_idx) in enumerate(skf_eval.split(x_tr_resample, y_tr_resample)):
    # Split data
    X_train_fold = x_tr_resample.iloc[train_idx]
    y_train_fold = y_tr_resample[train_idx]
    X_val_fold = x_tr_resample.iloc[val_idx]
    y_val_fold = y_tr_resample[val_idx]
    
    # Train model
    best_model.fit(X_train_fold, y_train_fold)
    
    # Make predictions
    y_pred_fold = best_model.predict(X_val_fold)
    
    # Calculate metrics
    accuracies.append(accuracy_score(y_val_fold, y_pred_fold))
    recalls.append(recall_score(y_val_fold, y_pred_fold, average='weighted'))
    precisions.append(precision_score(y_val_fold, y_pred_fold, average='weighted'))
    f1_scores.append(f1_score(y_val_fold, y_pred_fold, average='weighted'))



In [72]:
best_model.fit(x_tr_resample_scaled, y_tr_resample)
y_pred_test = best_model.predict(X_test_scaled) 



In [73]:
print("SVM with RBF Kernel Results:")
print("-" * 50)
print("Best Parameters:", best_params)
print("\nCross-validation Results (10-fold):")
print(f"Accuracy    : {np.mean(accuracies)*100:.2f}% (+/- {np.std(accuracies)*100:.2f}%)")
print(f"Recall      : {np.mean(recalls)*100:.2f}% (+/- {np.std(recalls)*100:.2f}%)")
print(f"Precision   : {np.mean(precisions)*100:.2f}% (+/- {np.std(precisions)*100:.2f}%)")
print(f"F1-Score    : {np.mean(f1_scores)*100:.2f}% (+/- {np.std(f1_scores)*100:.2f}%)")

print("\nTest Set Results:")
print(f"Accuracy    : {accuracy_score(y_test, y_pred_test)*100:.2f}%")
print(f"Recall      : {recall_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"Precision   : {precision_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"F1-Score    : {f1_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print("-" * 50)

SVM with RBF Kernel Results:
--------------------------------------------------
Best Parameters: {'C': 1000, 'class_weight': None, 'decision_function_shape': 'ovr', 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 15000, 'probability': True, 'shrinking': True, 'tol': 0.001}

Cross-validation Results (10-fold):
Accuracy    : 98.66% (+/- 0.22%)
Recall      : 98.66% (+/- 0.22%)
Precision   : 98.66% (+/- 0.22%)
F1-Score    : 98.66% (+/- 0.22%)

Test Set Results:
Accuracy    : 98.98%
Recall      : 98.98%
Precision   : 98.99%
F1-Score    : 98.99%
--------------------------------------------------
