<h1> Support Vector Machine Model (Train + Performance Evaluation) </h1>

In [34]:
# Step 1: Create Datasets
from sklearn.datasets import make_blobs, make_circles
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [35]:
class DatasetCreator:
    @staticmethod
    def create_blob_dataset() -> dict:
        n_samples_1 = 1000
        n_samples_2 = 100
        centers = [[0.0, 0.0], [2.0, 2.0]]
        cluster_std = [1.5, 0.5]

        X, y = make_blobs(n_samples=[n_samples_1, n_samples_2],
                          centers=centers,
                          cluster_std=cluster_std,
                          random_state=0,
                          shuffle=False)

        return {'X': X, 'y': y}

    @staticmethod
    def create_make_circles_dataset() -> dict:
        X, y = make_circles(500, factor=0.1, noise=0.1)
        return {'X': X, 'y': y}

In [36]:
dataset_creator = DatasetCreator()
blob_dataset = dataset_creator.create_blob_dataset()
circles_dataset = dataset_creator.create_make_circles_dataset()

In [37]:
# Step 2: Split Data into Training, Validation, and Test Sets
X_blob, y_blob = blob_dataset['X'], blob_dataset['y']
X_circles, y_circles = circles_dataset['X'], circles_dataset['y']

# Split blob dataset into training and temporary (remaining) data
X_blob_train_temp, X_blob_test, y_blob_train_temp, y_blob_test = train_test_split(X_blob, y_blob, test_size=0.2,
                                                                                  random_state=42)
X_blob_train, X_blob_val, y_blob_train, y_blob_val = train_test_split(X_blob_train_temp, y_blob_train_temp,test_size=0.25, random_state=42)

print(f"Blob Dataset:")
print(f"Train set: {X_blob_train.shape}, Validation set: {X_blob_val.shape}, Test set: {X_blob_test.shape}")

# Split circles dataset into training and temporary (remaining) data
X_circles_train_temp, X_circles_test, y_circles_train_temp, y_circles_test = train_test_split(X_circles, y_circles,test_size=0.2,random_state=42)
X_circles_train, X_circles_val, y_circles_train, y_circles_val = train_test_split(X_circles_train_temp, y_circles_train_temp, test_size=0.25,random_state=42)

print(f"\nCircles Dataset:")
print(f"Train set: {X_circles_train.shape}, Validation set: {X_circles_val.shape}, Test set: {X_circles_test.shape}")

Blob Dataset:
Train set: (660, 2), Validation set: (220, 2), Test set: (220, 2)

Circles Dataset:
Train set: (300, 2), Validation set: (100, 2), Test set: (100, 2)


In [38]:
# Train basic model SVM on blob dataset
svm_model_blob = SVC(kernel='linear', C=1.0)
svm_model_blob.fit(X_blob_train, y_blob_train)
svm_blob_accuracy = accuracy_score(y_blob_test, svm_model_blob.predict(X_blob_test))
print(f"SVM Blob Accuracy on Blob Dataset: {svm_blob_accuracy}")

SVM Blob Accuracy on Blob Dataset: 0.9545454545454546


In [39]:
# Train basic model SVM on blob dataset
svm_model_circle = SVC(kernel='linear', C=1.0)
svm_model_circle.fit(X_circles_train, y_circles_train)
svm_circle_accuracy = accuracy_score(y_circles_test, svm_model_circle.predict(X_circles_test))
print(f"SVM Cicles Dataset Accuracy: {svm_circle_accuracy}")

SVM Cicles Dataset Accuracy: 0.64
