## Hard-Margin SVM on Breast Cancer Dataset

### Importing Libraries

In [1]:
import numpy as np
import cvxopt
import cvxopt.solvers
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Loading and Preprocessing Breast Cancer Dataset

In [2]:
data = load_breast_cancer()
X = data.data
y = data.target
y = np.where(y == 0, -1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Forming the QP Matrices

In [3]:
N = X_train.shape[0]

Gtmp = np.dot(X_train, X_train.T) * np.outer(y_train, y_train)
G = cvxopt.matrix(Gtmp)

q = cvxopt.matrix(-np.ones(N))
M = cvxopt.matrix(-np.eye(N))
h = cvxopt.matrix(np.zeros(N))

A = cvxopt.matrix(y_train.astype(float), (1, N))
d = cvxopt.matrix(0.0)

### Solving the QP Problem

In [4]:
cvxopt.solvers.options['show_progress'] = False
qp_solution = cvxopt.solvers.qp(G, q, M, h, A, d)

alpha_optimal = np.ravel(qp_solution['x'])

### Support Vectors, Weight, and Bias

In [5]:
support_vector_indices = np.where(alpha_optimal > 1e-5)[0]

support_vectors = X_train[support_vector_indices]
support_vector_labels = y_train[support_vector_indices]

w = np.sum((alpha_optimal[support_vector_indices] * support_vector_labels).reshape(-1, 1) * support_vectors, axis=0)
b = np.mean(support_vector_labels - np.dot(support_vectors, w))

margin = 1 / np.linalg.norm(w)

print("Margin:", margin)
print(f"Number of Support Vectors: {len(support_vector_indices)}")

Margin: 0.021535710207290018
Number of Support Vectors: 25


### Predictions on Train and Test Sets

In [6]:
y_train_pred = np.sign(np.dot(X_train, w) + b)
y_test_pred = np.sign(np.dot(X_test, w) + b)

### Metrics: Accuracy & Misclassification

In [7]:
train_accuracy = np.mean(y_train_pred == y_train)
test_accuracy = np.mean(y_test_pred == y_test)

train_errors = np.sum(y_train_pred != y_train)
test_errors = np.sum(y_test_pred != y_test)

print("\n--- Train Set Results ---")
print(f"Accuracy        : {train_accuracy * 100:.2f}%")
print(f"Misclassified   : {train_errors} out of {len(y_train)}")

print("\n--- Test Set Results ---")
print(f"Accuracy        : {test_accuracy * 100:.2f}%")
print(f"Misclassified   : {test_errors} out of {len(y_test)}")


--- Train Set Results ---
Accuracy        : 100.00%
Misclassified   : 0 out of 455

--- Test Set Results ---
Accuracy        : 93.86%
Misclassified   : 7 out of 114
