In [1]:
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import scanpy as sc
import anndata
import os
import sys

In [4]:
sys.path.insert(1, '/secure/projects/tacco/')
import tacco as tc

In [5]:
sys.path.insert(1, '/secure/projects/HTAPP_MBC/src/spatial')
from spatial_utils import *

In [4]:
plots_dir=out_dir_base+"/plots/"
os.makedirs(plots_dir, exist_ok=True)

In [5]:
sample_sheet=pd.read_csv(codeDir+"/meta/sampletracking_HTAPP_MBC_spatial.csv")

In [6]:
#These identifiers are set as defaults in many functions. If names are changed, this has to be adapted when calling the functions.
ct_column='cell_type'
sm_methods=["slide_seq","merfish","merfish_bin","codex","exseq","exseq_bin"] #spatial mathod names are hard-coded for now because some methods need special treatments. Can be fixed by giving eatch method it's own variable.
sc_method="scRNAseq"
all_methods=[sc_method,*sm_methods]
all_methods

['scRNAseq',
 'slide_seq',
 'merfish',
 'merfish_bin',
 'codex',
 'exseq',
 'exseq_bin']

In [7]:
run_samples=list(filter(lambda a: not a in [], list(sample_sheet['sample'].unique()))) 
run_samples

['HTAPP-944-SMP-7479',
 'HTAPP-895-SMP-7359',
 'HTAPP-880-SMP-7179',
 'HTAPP-878-SMP-7149',
 'HTAPP-812-SMP-8239',
 'HTAPP-514-SMP-6760',
 'HTAPP-982-SMP-7629',
 'HTAPP-997-SMP-7789',
 'HTAPP-917-SMP-4531',
 'HTAPP-853-SMP-4381',
 'HTAPP-313-SMP-932',
 'HTAPP-330-SMP-1082',
 'HTAPP-783-SMP-4081',
 'HTAPP-364-SMP-1321',
 'HTAPP-213-SMP-6752']

In [8]:
res=pd.DataFrame()
obs_all=pd.DataFrame()
for sample in run_samples:
    print(sample)
    out_dir=out_dir_base+"/"+sample 
    adatas=get_and_process_data(sample,data_dir,out_dir,methods=all_methods,process=False,verbose=False)
    for idx in adatas.index.to_list():
        obs=adatas[idx].obs
        tmp=adatas[idx].obs.groupby(adatas[idx].obs["replicate"].astype(str)).agg({ 'n_counts' : ['median','mean','min','max'], 'n_genes' : ['median','mean','min','max','count']})
        try: 
            total_genes=adatas[idx].obsm['counts'].shape[1]
        except: 
            continue
        obs['sample']=sample
        obs['method']=idx
        tmp['sample']=sample
        tmp['method']=idx
        tmp['total_genes']=total_genes
        tmp['ALB']='ALB' in adatas[idx].uns['counts_var']
        res=pd.concat([res,tmp])
        obs_all=pd.concat([obs_all,obs])

HTAPP-944-SMP-7479
HTAPP-895-SMP-7359
HTAPP-880-SMP-7179
HTAPP-878-SMP-7149
HTAPP-812-SMP-8239
HTAPP-514-SMP-6760
HTAPP-982-SMP-7629
HTAPP-997-SMP-7789
HTAPP-917-SMP-4531
HTAPP-853-SMP-4381
HTAPP-313-SMP-932
HTAPP-330-SMP-1082
HTAPP-783-SMP-4081
HTAPP-364-SMP-1321
HTAPP-213-SMP-6752


In [9]:
res

Unnamed: 0_level_0,n_counts,n_counts,n_counts,n_counts,n_genes,n_genes,n_genes,n_genes,n_genes,sample,method,total_genes,ALB
Unnamed: 0_level_1,median,mean,min,max,median,mean,min,max,count,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
replicate,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
1,2748.000000,3281.706055,301.000000,19949.0000,2114.0,2240.537436,218,7898,5476,HTAPP-944-SMP-7479,scRNAseq,25811,True
2,3126.000000,3614.295166,308.000000,19771.0000,2363.0,2460.665639,288,7942,4540,HTAPP-944-SMP-7479,scRNAseq,25811,True
1,382.500000,565.534607,30.000000,5332.0000,308.0,408.561983,30,2745,9422,HTAPP-944-SMP-7479,slide_seq,19365,True
1,45.000000,53.079563,20.000000,320.0000,25.0,26.713592,5,82,9263,HTAPP-944-SMP-7479,merfish,291,False
2,53.000000,64.117752,20.000000,537.0000,28.0,29.910880,7,99,11176,HTAPP-944-SMP-7479,merfish,291,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,2428.000000,3314.817871,304.000000,19986.0000,1842.0,2202.154277,293,7595,4641,HTAPP-213-SMP-6752,scRNAseq,25083,False
2,2175.000000,2989.522949,487.000000,19914.0000,1753.0,2097.886778,137,7942,5158,HTAPP-213-SMP-6752,scRNAseq,25083,False
1,137.000000,209.913895,55.000000,3227.0000,116.0,169.714789,41,2053,3124,HTAPP-213-SMP-6752,slide_seq,17369,False
2,141.000000,202.246078,55.000000,3697.0000,121.0,166.481404,40,2976,6238,HTAPP-213-SMP-6752,slide_seq,17369,False


In [10]:
res_idx=res.reset_index().set_index(['sample'])

In [11]:
res_idx.columns = ['replicate','median_umis','mean_umis','min_umis','max_umis','median_genes','mean_genes','min_genes','max_genes','observations','method','total_genes','ALB']

In [12]:
res_idx.to_csv(plots_dir+"/stats.tsv",sep="\t")

In [11]:
res_idx[res_idx.method=="exseq"]

Unnamed: 0_level_0,replicate,median_umis,mean_umis,min_umis,max_umis,median_genes,mean_genes,min_genes,max_genes,observations,method,total_genes,ALB
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HTAPP-895-SMP-7359,1,186.0,275.799194,51.0,4432.0,52.0,60.58539,5,263,2998,exseq,297,False
HTAPP-982-SMP-7629,1,44.0,58.442543,20.0,614.0,29.0,32.90663,6,138,1810,exseq,299,True


In [12]:
res_idx[res_idx.method=="exseq_bin"]

Unnamed: 0_level_0,replicate,median_umis,mean_umis,min_umis,max_umis,median_genes,mean_genes,min_genes,max_genes,observations,method,total_genes,ALB
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HTAPP-982-SMP-7629,1,52.0,60.893864,20.0,193.0,33.0,34.94464,6,85,2836,exseq_bin,299,True


In [10]:
res_idx[res_idx.method=="merfish"]

Unnamed: 0_level_0,replicate,median_umis,mean_umis,min_umis,max_umis,median_genes,mean_genes,min_genes,max_genes,observations,method,total_genes,ALB
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HTAPP-944-SMP-7479,1,45.0,53.079563,20.0,320.0,25.0,26.713592,5,82,9263,merfish,291,False
HTAPP-944-SMP-7479,2,53.0,64.117752,20.0,537.0,28.0,29.91088,7,99,11176,merfish,291,False
HTAPP-944-SMP-7479,3,74.0,86.597412,20.0,658.0,35.0,36.304332,4,117,10758,merfish,291,False
HTAPP-880-SMP-7179,2,210.0,223.167038,20.0,1102.0,60.0,58.361467,8,150,3981,merfish,291,False
HTAPP-878-SMP-7149,2,66.0,83.85228,20.0,682.0,25.0,27.571818,3,99,11014,merfish,291,False
HTAPP-812-SMP-8239,1,64.0,82.37793,20.0,575.0,29.0,31.67965,4,104,3543,merfish,291,False
HTAPP-514-SMP-6760,2,85.0,102.805679,20.0,750.0,38.0,41.62191,5,135,17636,merfish,291,False
HTAPP-982-SMP-7629,1,61.0,67.675072,20.0,340.0,30.0,31.004767,7,95,7971,merfish,291,False
HTAPP-982-SMP-7629,2,53.0,59.160145,20.0,225.0,27.0,28.389848,6,80,5851,merfish,291,False
HTAPP-917-SMP-4531,2,115.0,125.146294,20.0,624.0,47.0,46.408384,7,125,11641,merfish,291,False


In [34]:
res_idx[res_idx.method=="merfish_bin"]

Unnamed: 0_level_0,replicate,median_umis,mean_umis,min_umis,max_umis,median_genes,mean_genes,min_genes,max_genes,observations,method,total_genes,ALB
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HTAPP-944-SMP-7479,1,88.0,92.894646,20.0,290.0,41.0,41.23996,3,93,25846,merfish_bin,291,False
HTAPP-944-SMP-7479,2,110.0,116.754112,20.0,482.0,47.0,46.873757,5,119,29269,merfish_bin,291,False
HTAPP-944-SMP-7479,3,152.0,151.772064,20.0,432.0,57.0,54.086989,4,113,27153,merfish_bin,291,False
HTAPP-880-SMP-7179,2,206.0,216.66391,20.0,606.0,62.0,58.247188,6,124,12092,merfish_bin,291,False
HTAPP-878-SMP-7149,2,80.0,98.620728,20.0,518.0,29.0,31.570358,3,121,40024,merfish_bin,291,False
HTAPP-812-SMP-8239,1,73.0,84.388924,20.0,303.0,33.0,34.416884,3,84,16453,merfish_bin,291,False
HTAPP-514-SMP-6760,2,142.0,145.534439,20.0,504.0,57.0,54.856886,4,119,65053,merfish_bin,291,False
HTAPP-982-SMP-7629,1,97.0,102.048607,20.0,328.0,42.0,42.058004,6,87,28636,merfish_bin,291,False
HTAPP-982-SMP-7629,2,74.0,80.749046,20.0,260.0,36.0,36.088462,8,77,20992,merfish_bin,291,False
HTAPP-917-SMP-4531,2,174.0,172.121994,20.0,487.0,61.0,57.154659,5,110,42959,merfish_bin,291,False


In [35]:
res_idx[res_idx.method=="slide_seq"]

Unnamed: 0_level_0,replicate,median_umis,mean_umis,min_umis,max_umis,median_genes,mean_genes,min_genes,max_genes,observations,method,total_genes,ALB
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HTAPP-944-SMP-7479,1,382.5,565.534607,30.0,5332.0,308.0,408.561983,30,2745,9422,slide_seq,19365,True
HTAPP-895-SMP-7359,1,380.0,607.814514,30.0,7112.0,282.0,402.293076,30,3111,6210,slide_seq,18107,False
HTAPP-880-SMP-7179,1,172.0,257.291626,46.0,8360.0,138.0,192.873423,30,5437,12838,slide_seq,17731,True
HTAPP-880-SMP-7179,2,115.0,159.67807,45.0,1845.0,92.0,124.332941,30,1181,9350,slide_seq,17731,True
HTAPP-878-SMP-7149,1,113.0,139.779221,63.0,939.0,96.0,116.847402,37,644,7949,slide_seq,19041,False
HTAPP-878-SMP-7149,2,133.0,174.173035,63.0,1303.0,114.0,144.271372,35,990,14084,slide_seq,19041,False
HTAPP-812-SMP-8239,1,160.0,236.36673,51.0,2162.0,137.0,190.174255,35,1343,6479,slide_seq,16746,False
HTAPP-812-SMP-8239,2,123.0,165.751892,52.0,1894.0,105.0,136.282269,35,1206,4478,slide_seq,16746,False
HTAPP-514-SMP-6760,1,178.0,281.985077,30.0,3711.0,163.0,241.086196,30,2335,17437,slide_seq,20944,False
HTAPP-514-SMP-6760,2,217.0,339.457336,30.0,4826.0,195.0,281.756426,30,2579,15835,slide_seq,20944,False


In [16]:
res_idx.loc[res_idx.method=="codex",["replicate","observations"]]

Unnamed: 0_level_0,replicate,observations
sample,Unnamed: 1_level_1,Unnamed: 2_level_1
HTAPP-880-SMP-7179,2,21019
HTAPP-878-SMP-7149,2,32070
HTAPP-812-SMP-8239,2,12815
HTAPP-514-SMP-6760,2,35995
HTAPP-982-SMP-7629,2,19246
HTAPP-997-SMP-7789,1,22587
HTAPP-917-SMP-4531,2,35867
HTAPP-853-SMP-4381,1,14710
HTAPP-313-SMP-932,1,30005
HTAPP-330-SMP-1082,2,36888
