In [19]:
import squidpy as sq
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import svm
from sklearn import metrics
from scipy.sparse import hstack
import json
from scipy.sparse import csr_matrix
import math


In [20]:
adata = sq.datasets.seqfish()

labels = adata.obs['celltype_mapped_refined'].cat.codes.values
classes = np.unique(labels)
_, counts = np.unique(labels, return_counts=True)

X_with_spatial = hstack((adata.X, adata.obsm['spatial']))

In [21]:
svm_results = []

In [22]:
def save_results(filename, results):
    with open(filename, 'w') as f:
        for result in results:
            json.dump(result, f)
            f.write("\n")

In [23]:
def get_stats(y_true, y_pred):
    return {
        'accuracy_score': metrics.accuracy_score(y_true, y_pred),
        'balanced_accuracy': metrics.balanced_accuracy_score(y_true, y_pred),
        'f1_score': metrics.f1_score(y_true, y_pred, average='macro', labels=classes, zero_division=0),
        'recall': metrics.recall_score(y_true, y_pred, average='macro', labels=classes, zero_division=0),
        'precision_score': metrics.precision_score(y_true, y_pred, average='macro', labels=classes, zero_division=0),
    }

def average_stats(stats_list):
    keys = stats_list[0].keys()
    n = len(stats_list)
    avg = {}
    for key in keys:
        sum = 0
        for stats in stats_list:
            sum += stats[key]
        avg[key] = sum / n
    return avg

def eval_k_fold(model, x, y):
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=1604)
    stats_list = []
    for train_index, test_index in skf.split(x, y):
        model.fit(x[train_index], y[train_index])
        pred = model.predict(x[test_index])
        stats_list.append(get_stats(y[test_index], pred[test_index]))
    avg = average_stats(stats_list)
    print(avg)
    return avg


In [7]:
classifier = svm.SVC(kernel='rbf')
res = eval_k_fold(classifier, adata.X, labels)
res['description'] = "No spatial data"
svm_results.append(res)

{'acc': 0.8413679439637413, 'f1': 0.6918458992784452, 'balanced_acc': 0.6828352659430069}
{'acc': 0.8083539348990523, 'f1': 0.6448603906164005, 'balanced_acc': 0.6304722328293196}
{'acc': 0.8207663782447465, 'f1': 0.6876761021251848, 'balanced_acc': 0.6754432015820995}
{'acc': 0.8144829007004533, 'f1': 0.6733388685511592, 'balanced_acc': 0.662373033572585}


In [8]:
classifier = svm.SVC(kernel='rbf', class_weight='balanced')
res = eval_k_fold(classifier, adata.X, labels)
res['description'] = "No spatial data, balanced"
svm_results.append(res)


{'acc': 0.7940358467243511, 'f1': 0.6975112341824025, 'balanced_acc': 0.7065444094052212}


In [None]:
classifier = svm.SVC(kernel='rbf')
res = eval_k_fold(classifier, X_with_spatial, labels)
res['description'] = "With spatial data"
svm_results.append(res)

In [None]:
theta = math.pi / 2
R = np.array([[math.cos(theta), -math.sin(theta)],
             [math.sin(theta), math.cos(theta)]])
X_with_spatial_rotated = hstack(
    (X_with_spatial[:, :-2], X_with_spatial[:, -2:] @ csr_matrix(R)))
res = eval_k_fold(classifier, X_with_spatial_rotated, labels)
res['description'] = "With spatial data, rotated"
svm_results.append(res)


In [None]:
save_results("svm_results", svm_results)