In [None]:
import scanpy as sc
import numpy as np
import os
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.stats import entropy, itemfreq
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture as GMM
from sklearn.metrics import adjusted_rand_score as ARI
from sklearn.metrics import normalized_mutual_info_score as NMI
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import LabelEncoder
import csv

In [None]:
DATASETS = {
    "tabula_senis": {"name": "tabula_senis", "tissue": "tissue", "condition_key": "method_age", "cell_type_key": 'cell_ontology_class',
                    "target_conditions": ["droplet - 3m", "facs - 3m"]},
}

In [None]:
sc.settings.autosave = True

# Tabula Senis Muris

In [None]:
data_dict = DATASETS['tabula_senis']
data_name = data_dict['name']
condition_key = data_dict['condition_key']
cell_type_key = data_dict['cell_type_key']
tissue = data_dict['tissue']
target_conditions = data_dict['target_conditions']

adata = sc.read(f"./data/{data_name}_normalized.h5ad")
adata

In [None]:
source_adata = adata[~adata.obs[tissue].isin(['Trachea'])]
source_adata = source_adata[~source_adata.obs[condition_key].isin(target_conditions)]
source_adata

In [None]:
target_adata = adata[adata.obs[condition_key].isin(target_conditions)]
target_adata

In [None]:
conditions = ['droplet - 24m', 'droplet - 18m', 'droplet - 21m', 'droplet - 1m', 'droplet - 30m', 'facs - 18m', 'facs - 24m', 'facs - 21m']

In [None]:
for c in conditions:
    batch_adata = source_adata[source_adata.obs[condition_key] == c, :]
    batch = pd.DataFrame(data=batch_adata.X.A.transpose(),
                  index=batch_adata.var_names,
                  columns=batch_adata.obs_names)
    with open(f'./{c}_celltype.csv', 'w') as f: 
        write = csv.writer(f) 
        write.writerow([c])
        for row in zip(batch_adata.obs[cell_type_key].tolist()):
            write.writerow(row)
    batch.to_csv(f"./{c}.csv")

In [None]:
for t in target_conditions:
    batch_adata = target_adata[target_adata.obs[condition_key] == t, :]
    batch = pd.DataFrame(data=batch_adata.X.A.transpose(),
                  index=batch_adata.var_names,
                  columns=batch_adata.obs_names)
    
    
    with open(f'./{t}_celltype.csv', 'w') as f: 
        write = csv.writer(f) 
        write.writerow([t])
        for row in zip(batch_adata.obs[cell_type_key].tolist()):
            write.writerow(row)
        
    batch.to_csv(f"./{t}.csv")

### Now run the Seurat_Classification.R script, then continue
<br>

In [None]:
final_adata = None
for t in target_conditions:
    batch_adata = target_adata[target_adata.obs[condition_key] == t, :]
    
    preds = pd.read_csv(f'./results/Seurat/{data_name}/{t}_pred.csv', index_col=0).values.T
    batch_adata.obs['Prediction'] = preds[0]
    batch_adata.obs['Prediction_Score'] = preds[1]
    
    if final_adata is None:
        final_adata = batch_adata
    else:
        final_adata = final_adata.concatenate(batch_adata)

In [None]:
final_adata.write(f"./results/Seurat/{data_name}/classification_result_adata.h5ad")