In [1]:
import numpy as np
data = np.load("../data/cancer_data.npz")

X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']

print(f"Training samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}, Features: {X_train.shape[1]}")

Training samples: 64, Test samples: 136, Features: 4


In [18]:
def class_similarity(kernel_matrix, y):
    # mask để bỏ đường chéo
    mask = ~np.eye(kernel_matrix.shape[0], dtype=bool)

    # index của từng class
    class_0_idx = np.where(y == -1)[0]
    class_1_idx = np.where(y == 1)[0]

    # Within-class similarity
    within_0 = kernel_matrix[np.ix_(class_0_idx, class_0_idx)][mask[:len(class_0_idx), :len(class_0_idx)]].mean()
    within_1 = kernel_matrix[np.ix_(class_1_idx, class_1_idx)][mask[:len(class_1_idx), :len(class_1_idx)]].mean()

    # Between-class similarity
    between = kernel_matrix[np.ix_(class_0_idx, class_1_idx)].mean()

    # Separability ratio
    sep_ratio = (within_0 + within_1) / (2 * between)

    return within_0, within_1, between, sep_ratio

In [None]:
# similarity of data before and after scaling
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel, sigmoid_kernel


original_k = X_train @ X_train.T
rbf_k = rbf_kernel(X_train)
poly_k = polynomial_kernel(X_train)
sigmoid_k = sigmoid_kernel(X_train)

results = {}
kernels = {
    "Original (Linear)": original_k,
    "RBF": rbf_k,
    "Polynomial": poly_k,
    "Sigmoid": sigmoid_k
}

for name, K in kernels.items():
    w0, w1, b, ratio = class_similarity(K, y_train)
    results[name] = {
        "Within class -1": w0,
        "Within class +1": w1,
        "Between class": b,
        "Separability ratio": ratio
    }

# Xuất kết quả thành DataFrame
df_results = pd.DataFrame(results).T
print(df_results.round(4))

                   Within class -1  Within class +1  Between class  \
Original (Linear)           6.7625          10.8517         7.6231   
RBF                         0.5882           0.8440         0.4635   
Polynomial                 21.1802          51.4895        26.6599   
Sigmoid                     0.9860           0.9987         0.9895   

                   Separability ratio  
Original (Linear)              1.1553  
RBF                            1.5451  
Polynomial                     1.3629  
Sigmoid                        1.0029  


In [22]:
# Classical SVM with RBF kernel
from sklearn.svm import SVC

for c in [0.1, 1.0, 10.0]:
	svc = SVC(kernel='rbf', C=c)
	svc.fit(X_train, y_train)
	accuracy_svc = svc.score(X_test, y_test)
	print(f"Classical SVC Test accuracy with C={c}: {accuracy_svc * 100:.2f}%")


Classical SVC Test accuracy with C=0.1: 97.06%
Classical SVC Test accuracy with C=1.0: 96.32%
Classical SVC Test accuracy with C=10.0: 96.32%
