In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
)

def train_svm_model(file_path, column_names, target_column, one_hot_columns=None, threshold=None, test_size=0.3, random_state=42):
    """
    Train an SVM model on a given dataset and display its performance metrics.
    
    Parameters:
        file_path (str): Path to the dataset file.
        column_names (list): List of column names for the dataset.
        target_column (str): Name of the target column.
        one_hot_columns (list): List of column names to one-hot encode. Default is None.
        threshold (int or float): Threshold to convert target column into binary classes. Default is None.
        test_size (float): Fraction of the dataset to be used for testing. Default is 0.25.
        random_state (int): Random seed for reproducibility. Default is 42.
    
    Returns:
        None
    """
    # Load dataset
    data = pd.read_csv(file_path, header=None, names=column_names)
    
    # Preprocessing: Convert target column to binary if threshold is provided
    if threshold is not None:
        data[target_column] = data[target_column] > threshold
    
    # Preprocessing: One-hot encode specified columns
    if one_hot_columns:
        data = pd.get_dummies(data, columns=one_hot_columns, drop_first=True)
    
    # Split data into features (X) and target (y)
    X = data.drop(columns=[target_column])  
    y = data[target_column] 
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    svm_model = SVC(kernel="rbf", probability=True, random_state=random_state)
    svm_model.fit(X_train, y_train)
    
    y_pred = svm_model.predict(X_test)
    
    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")
    f1 = f1_score(y_test, y_pred, average="weighted")
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print("\nConfusion Matrix:")
    print(conf_matrix)
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))


In [None]:
file_path = "../dataset abalone/abalone.data"
column_names = ["Sex", "Length", "Diameter", "Height", "Whole weight",
                "Shucked weight", "Viscera weight", "Shell weight", "Rings"]
target_column = "Rings"
one_hot_columns = ["Sex"]
threshold = 10

train_svm_model(file_path, column_names, target_column, one_hot_columns, threshold)

Accuracy: 0.78
Precision: 0.77
Recall: 0.78
F1 Score: 0.77

Confusion Matrix:
[[750  90]
 [188 226]]

Classification Report:
              precision    recall  f1-score   support

       False       0.80      0.89      0.84       840
        True       0.72      0.55      0.62       414

    accuracy                           0.78      1254
   macro avg       0.76      0.72      0.73      1254
weighted avg       0.77      0.78      0.77      1254



In [4]:
file_path = "../dataset balance+scale/balance-scale.data"
column_names = ["Class", "Left-Weight", "Left-Distance", "Right-Weight", "Right-Distance"]
target_column = "Class"
one_hot_columns = None
threshold = None

train_svm_model(file_path, column_names, target_column, one_hot_columns, threshold)

Accuracy: 0.89
Precision: 0.81
Recall: 0.89
F1 Score: 0.85

Confusion Matrix:
[[ 0 11  7]
 [ 0 80  0]
 [ 0  2 88]]

Classification Report:
              precision    recall  f1-score   support

           B       0.00      0.00      0.00        18
           L       0.86      1.00      0.92        80
           R       0.93      0.98      0.95        90

    accuracy                           0.89       188
   macro avg       0.60      0.66      0.63       188
weighted avg       0.81      0.89      0.85       188



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
file_path = "../dataset breast+cancer/breast-cancer.data"
column_names = [
    "Class", "Age", "Menopause", "Tumor-Size", "Inv-Nodes", "Node-Caps",
    "Deg-Malig", "Breast", "Breast-Quad", "Irradiat"
]
target_column = "Class"
one_hot_columns=["Age", "Menopause", "Tumor-Size", "Inv-Nodes", "Node-Caps", "Breast", "Breast-Quad", "Irradiat"]
threshold = None

train_svm_model(file_path, column_names, target_column, one_hot_columns, threshold)

Accuracy: 0.72
Precision: 0.72
Recall: 0.72
F1 Score: 0.66

Confusion Matrix:
[[57  2]
 [22  5]]

Classification Report:
                      precision    recall  f1-score   support

no-recurrence-events       0.72      0.97      0.83        59
   recurrence-events       0.71      0.19      0.29        27

            accuracy                           0.72        86
           macro avg       0.72      0.58      0.56        86
        weighted avg       0.72      0.72      0.66        86



In [7]:
file_path = "../dataset iris/iris.data"
column_names = ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth", "Class"]
target_column = "Class"
one_hot_columns = None
threshold = None

train_svm_model(file_path, column_names, target_column, one_hot_columns, threshold)

Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]

Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        19
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00        13

       accuracy                           1.00        45
      macro avg       1.00      1.00      1.00        45
   weighted avg       1.00      1.00      1.00        45



In [8]:
file_path = "../dataset wine/wine.data"
column_names = [
    "Class", "Alcohol", "MalicAcid", "Ash", "AlcalinityOfAsh", "Magnesium",
    "TotalPhenols", "Flavanoids", "NonflavanoidPhenols", "Proanthocyanins",
    "ColorIntensity", "Hue", "OD280_OD315", "Proline"
]
target_column = "Class"
one_hot_columns=None
threshold=None

train_svm_model(file_path, column_names, target_column, one_hot_columns, threshold)

Accuracy: 0.76
Precision: 0.75
Recall: 0.76
F1 Score: 0.72

Confusion Matrix:
[[19  0  0]
 [ 0 19  2]
 [ 0 11  3]]

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        19
           2       0.63      0.90      0.75        21
           3       0.60      0.21      0.32        14

    accuracy                           0.76        54
   macro avg       0.74      0.71      0.69        54
weighted avg       0.75      0.76      0.72        54

