Load Packages

In [8]:
import pandas as pd
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, roc_curve, auc

Load Dataset and split

In [9]:
# Load the dataset
df = pd.read_csv('binary.arff.csv')

# Split data into X (features) and Y (target)
X = df.iloc[:, :-1].values  # Features
Y = df.iloc[:, -1].values   # Labels

# Convert categorical labels to binary labels
le = LabelEncoder()
Y = le.fit_transform(Y)  # Encode 'ALL' as 0 and 'AML' as 1

SVM Before Feature Selection 

In [11]:
# Split the dataset into training and test data (80:20 split)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=14)

# Build the model
svm = SVC(kernel='linear', random_state=12, probability=True)  # Set probability=True for ROC-AUC

# Train the model
svm.fit(X_train, Y_train)

# Predict the labels for the test set
Y_pred = svm.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(Y_test, Y_pred)
classification_rep = classification_report(Y_test, Y_pred)

from sklearn import metrics

# Calculate ROC AUC score
roc_auc = metrics.roc_auc_score(Y_test, Y_pred)
print(f"ROC-AUC: {roc_auc:.4f}")

# Output the results
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_rep)
print("Confusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))
print("\nROC-AUC Score:", roc_auc)

# Perform n-fold cross-validation
n = 2
cv_scores = cross_val_score(svm, X, Y, cv=n)

# Print cross-validation scores
print("\nCross-validation Scores:", cv_scores)
print("Mean Cross-validation Accuracy:", cv_scores.mean())

ROC-AUC: 0.9583
Accuracy: 0.9333333333333333

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.75      1.00      0.86         3

    accuracy                           0.93        15
   macro avg       0.88      0.96      0.91        15
weighted avg       0.95      0.93      0.94        15

Confusion Matrix:
[[11  1]
 [ 0  3]]

ROC-AUC Score: 0.9583333333333333

Cross-validation Scores: [0.91666667 0.88888889]
Mean Cross-validation Accuracy: 0.9027777777777777
