## 1. Set Data For Training

- Dataset: Iris (Full Dataset)
- All 150 samples (50 per class)
- 3 classes: {0, 1, 2}
- Features: 4
- Random seed: 42
- Scale: [-π, π]
- Split: 80% train, 10% validation, 10% test

In [1]:
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load the full iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

print(f"Full dataset - X shape: {X.shape}, y shape: {y.shape}")
print(f"Class distribution: {np.bincount(y)}")

# Shuffle and split into train (80%), temp (20%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.4, random_state=42, stratify=y
)

# Split temp into validation (50%) and test (50%) -> 10% each of total
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Scale to [-π, π]
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print(f"\nTraining X shape: {X_train.shape}, y shape: {y_train.shape}")
print(f"Training class distribution: {np.bincount(y_train)}")
print(f"\nValidation X shape: {X_val.shape}, y shape: {y_val.shape}")
print(f"Validation class distribution: {np.bincount(y_val)}")
print(f"\nTest X shape: {X_test.shape}, y shape: {y_test.shape}")
print(f"Test class distribution: {np.bincount(y_test)}")

Full dataset - X shape: (150, 4), y shape: (150,)
Class distribution: [50 50 50]

Training X shape: (90, 4), y shape: (90,)
Training class distribution: [30 30 30]

Validation X shape: (30, 4), y shape: (30,)
Validation class distribution: [10 10 10]

Test X shape: (30, 4), y shape: (30,)
Test class distribution: [10 10 10]


In [2]:
# Save the training, validation, and testing data
np.savez_compressed(
    "../data/iris_4features_data.npz", 
    X_train=X_train, y_train=y_train,
    X_val=X_val, y_val=y_val,
    X_test=X_test, y_test=y_test
)

In [3]:
data = np.load("../data/iris_4features_data.npz")

X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']

In [4]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd())))

In [5]:
from sklearn.metrics.pairwise import rbf_kernel
from src.utils import calculate_accuracy

rbf_K_train = rbf_kernel(X_train)
rbf_K_val = rbf_kernel(X_val, X_train)
rbf_K_test = rbf_kernel(X_test, X_train)

classical_val_acc, classical_test_acc, _ = calculate_accuracy(
    rbf_K_train, rbf_K_val, rbf_K_test,
    y_train, y_val, y_test
)
print("Val acc | Test acc")
print(f"{classical_val_acc} | {classical_test_acc}")

{'C': np.float64(23.501), 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'precomputed', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
15
Val acc | Test acc
1.0 | 0.9333333333333333
