In [10]:
import squidpy as sq
import numpy as np
import math
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn import svm
from sklearn import metrics
from scipy.sparse import hstack
from sklearn.preprocessing import normalize


In [11]:
adata = sq.datasets.seqfish()

labels = adata.obs['celltype_mapped_refined'].cat.codes.values
classes = np.unique(labels)
_, counts = np.unique(labels, return_counts=True)

X_with_spatial = hstack((adata.X, adata.obsm['spatial']))

normalized_x = normalize(adata.X, norm='max', axis=0)
log_x = adata.X.copy()
log_x.data = np.log(log_x.data)
normalized_log_x = normalize(log_x, norm='max', axis=0)


In [6]:
def eval_k_fold(model, x, y):
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=1604)
    acc = []
    balanced_acc = []
    f1 = []
    for train_index, test_index in skf.split(x, y):
        model.fit(x[train_index], y[train_index])
        pred = model.predict(x[test_index])
        acc.append(metrics.accuracy_score(y[test_index], pred))
        f1.append(metrics.f1_score(
            y[test_index], pred, average='macro', labels=classes, zero_division=0))
        balanced_acc.append(
            metrics.balanced_accuracy_score(y[test_index], pred))
    return {'acc': np.mean(acc), 'f1': np.mean(f1), 'balanced_acc': np.mean(balanced_acc)}


In [7]:
classifier = svm.SVC(kernel='rbf')
results = []
for data in [adata.X, normalized_x, log_x, normalized_log_x]:
    res = eval_k_fold(classifier, data, labels)
    print(res)
    results.append(res)

{'acc': 0.8413679439637413, 'f1': 0.6918458992784452, 'balanced_acc': 0.6828352659430069}
{'acc': 0.8083539348990523, 'f1': 0.6448603906164005, 'balanced_acc': 0.6304722328293196}
{'acc': 0.8207663782447465, 'f1': 0.6876761021251848, 'balanced_acc': 0.6754432015820995}
{'acc': 0.8144829007004533, 'f1': 0.6733388685511592, 'balanced_acc': 0.662373033572585}


In [8]:
classifier_balanced = svm.SVC(kernel='rbf', class_weight='balanced')
res = eval_k_fold(classifier_balanced, data, labels)
print(res)

{'acc': 0.7940358467243511, 'f1': 0.6975112341824025, 'balanced_acc': 0.7065444094052212}


In [None]:
classifier_x_plus_spatial = svm.SVC(kernel='rbf')
# classifier_x_plus_spatial.fit(X_train[:, :-2], y_train)
# y_pred = classifier_x_plus_spatial.predict(X_test[:, :-2])
# print("Accuracy (RBF SVC, Spatial):", metrics.accuracy_score(y_test, y_pred))
print(eval_k_fold(classifier_x_plus_spatial, X_with_spatial, labels))


In [None]:
classifier_spatial_only = svm.SVC(kernel='rbf')
# classifier_spatial_only.fit(X_with_spatial[:, -2:], labels)
# y_pred = classifier_spatial_only.predict(X_with_spatial[:, -2:])
# print("Accuracy (RBF SVC, Spatial only):",
#       metrics.accuracy_score(y_test, y_pred))
print(eval_k_fold(classifier_spatial_only, X_with_spatial[:, -2:], labels))

In [None]:
# theta = math.pi / 2
# R = np.array([[math.cos(theta), -math.sin(theta)],
#              [math.sin(theta), math.cos(theta)]])
# X_test_spatial_only_rotated = np.matmul(X_train[:, -2:], R)
# y_pred = classifier_spatial_only.predict(X_test_spatial_only_rotated)
# print("Accuracy (RBF SVC, Spatial only, Rotated):",
#       metrics.accuracy_score(y_test, y_pred))


In [None]:
# from scipy.sparse import csr_matrix

# theta = math.pi / 2
# R = np.array([[math.cos(theta), -math.sin(theta)],
#              [math.sin(theta), math.cos(theta)]])
# X_test_spatial_rotated = hstack(
#     (X_test[:, :-2], X_test[:, -2:] @ csr_matrix(R)))
# y_pred = classifier_x_plus_spatial.predict(X_test_spatial_rotated)
# print("Accuracy (RBF SVC, Spatial, Rotated):",
#       metrics.accuracy_score(y_test, y_pred))
