In [25]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Prepering the Dataset:

In [26]:
seed = 42
np.random.seed(seed)

In [27]:
# Load dataset
data = datasets.load_breast_cancer()
X, y = data.data, data.target
print(f'Shape of X: {X.shape}')
print(f'Shape of y: {y.shape}')

Shape of X: (569, 30)
Shape of y: (569,)


In [28]:
# Splitting for training sets (train: 60%, temporary: 40%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X,
    y,
    train_size=0.6,
    random_state=seed,
    stratify=y)

# Splitting for validation and testing sets (test: 20%, val: 20%)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp,
    y_temp,
    train_size=0.5,
    random_state=seed,
    stratify=y_temp)

# Preporcessing: Standardize features
scaler = StandardScaler()
X_train == scaler.fit_transform(X_train)
X_val == scaler.fit_transform(X_val)
X_test == scaler.fit_transform(X_test)

print(f'Shape of X_train: {X_train.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'---------------------------------')
print(f'Shape of X_val: {X_val.shape}')
print(f'Shape of y_val: {y_val.shape}')
print(f'---------------------------------')
print(f'Shape of X_test: {X_test.shape}')
print(f'Shape of y_test: {y_test.shape}')

Shape of X_train: (341, 30)
Shape of y_train: (341,)
---------------------------------
Shape of X_val: (114, 30)
Shape of y_val: (114,)
---------------------------------
Shape of X_test: (114, 30)
Shape of y_test: (114,)


## Train the SVM Model

In [29]:
svm = SVC(random_state=seed)
svm.fit(X_train, y_train)
pred_val = svm.predict(X_val)
acc_val = accuracy_score(y_val, pred_val)
print(f'validation accuracy: {acc_val}')

validation accuracy: 0.9035087719298246


## Initial surrogate Model