In [None]:
# In scikit-learn pipelines, StandardScaler() is used in the as a step in the pipeline, 
# just before SMOTE and SVM parts, the data is standardized (before SMOTE() is applied).
# The same scaling is consistently applied to all folds during cross-validation.
# We used stratified K-fold method with n_splits=5
# used rbf, linear and polynomial kernels 


In [None]:
from imblearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=5) # number statified k folds = 5

# Pipeline with StandardScaler, SMOTE, and SVM-rbf kernel.
svm_pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),    # Standardization using the StandardScaler() function  using 'scaler'  (Scaling)
    ('smote', SMOTE()),              #  Oversampling using SMOTE()
    ('svm', SVC(kernel='rbf', class_weight='balanced'))  # SVM with class weighting, using Radial Basis Function (RBF)
])

# Train and evaluate the model using cross-validation
cv_results = cross_val_score(svm_pipeline, x, y, cv=skf, scoring='f1')

print("Cross-Validated F1 Score for SVM-rbf kernel: ", cv_results.mean())

# Train the final model and evaluate
svm_pipeline.fit(x, y)
y_pred = svm_pipeline.predict(x)
print(classification_report(y, y_pred))


In [None]:
#  SVM (Polynomial Kernel)
svm_poly_pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),    # Step 1: Standardization (Scaling)
    ('smote', SMOTE()),              # Step 2: Oversampling using SMOTE
    ('svm', SVC(kernel='poly', degree=3, class_weight='balanced'))  # Step 3: SVM with Polynomial Kernel and class weighting
])

# Train and evaluate the model using cross-validation
cv_poly_results = cross_val_score(svm_poly_pipeline, x, y, cv=skf, scoring='f1')

print("Cross-Validated F1 Score for Polynomial kernel : ", cv_poly_results.mean())

# Train the final model and evaluate
svm_poly_pipeline.fit(x, y)

y_pred_poly = svm_poly_pipeline.predict(x)

print(classification_report(y, y_pred_poly))

In [None]:
# Tuning the SVM Polynomial kernel 
# Parameter grid for tuning the polynomial SVM
param_grid = {
    'svm__degree': [2, 3, 4],       # Tuning different degrees of the polynomial kernel
    'svm__C': [0.1, 1, 10],         # Tuning regularization parameter C
    'svm__gamma': ['scale', 'auto'] # Tuning the gamma parameter with 'auto'
}

# Initialize GridSearchCV
grid_search = GridSearchCV(svm_poly_pipeline, param_grid, scoring='f1', cv=5)

# Fit the model and search for the best parameters
grid_search.fit(x, y)

# Print the best parameters and score

print("Best Parameters for Polynomial SVM:", grid_search.best_params_)

print("Best F1 Score:", grid_search.best_score_)



In [None]:
# SVM (Linear Kernel)

svm_linear_pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),    # Step 1: Standardization (Scaling)
    ('smote', SMOTE()),              # Step 2: Oversampling using SMOTE
    ('svm', SVC(kernel='linear', class_weight='balanced'))  # Step 3: SVM with Linear Kernel and class weighting
])

# Train and evaluate the model using cross-validation
cv_linear_results = cross_val_score(svm_linear_pipeline, x, y, cv=skf, scoring='f1')

print("Cross-Validated F1 Score for the Linear kernel: ", cv_linear_results.mean())

# Train the final model and evaluate
svm_linear_pipeline.fit(x, y)

y_pred_linear = svm_linear_pipeline.predict(x)

print(classification_report(y, y_pred_linear))