In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV

In [None]:
DATASETS = {
    "tabula_senis": {"name": "tabula_senis", "tissue": "tissue", "condition_key": "method_age", "cell_type_key": 'cell_ontology_class',
                    "target_conditions": ["droplet - 3m", "facs - 3m"]},
}

In [None]:
sc.settings.autosave = True

# Tabula Senis Muris

In [None]:
data_dict = DATASETS['tabula_senis']
data_name = data_dict['name']
condition_key = data_dict['condition_key']
cell_type_key = data_dict['cell_type_key']
tissue = data_dict['tissue']
target_conditions = data_dict['target_conditions']

adata = sc.read(f"./data/{data_name}_normalized.h5ad")
adata

In [None]:
source_adata = adata[~adata.obs[tissue].isin(['Trachea'])]
source_adata = source_adata[~source_adata.obs[condition_key].isin(target_conditions)]
source_adata

In [None]:
target_adata = adata[adata.obs[condition_key].isin(target_conditions)]
target_adata

In [None]:
target_adata.obs[cell_type_key].to_csv("SVM_cells.csv")

In [None]:
Classifier = LinearSVC()
clf = CalibratedClassifierCV(Classifier)

clf.fit(source_adata.X, source_adata.obs[cell_type_key])

In [None]:
threshold = 0.7

predicted = clf.predict(target_adata.X)
probs = np.max(clf.predict_proba(target_adata.X), axis = 1)
unlabeled = np.where(probs < threshold)
predicted[unlabeled] = 'Unknown'

In [None]:
pd.DataFrame({'Predicted': predicted, 'Prediction_Score':probs}).to_csv("SVM_Results.csv")