In [1]:
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import scanpy as sc
import anndata
import os
import sys

In [4]:
sys.path.insert(1, '/secure/projects/tacco/')
import tacco as tc

In [5]:
sys.path.insert(1, '/secure/projects/HTAPP_MBC/src/spatial')
from spatial_utils import *

In [12]:
combined_dir=out_dir_base+"/combined_all/"
os.makedirs(combined_dir, exist_ok=True)

In [7]:
sample_sheet=pd.read_csv(codeDir+"/meta/sampletracking_HTAPP_MBC_spatial.csv")

In [8]:
#These identifiers are set as defaults in many functions. If names are changed, this has to be adapted when calling the functions.
ct_column='cell_type'
sm_methods=["slide_seq","merfish","merfish_bin","codex","exseq","exseq_bin"] #spatial mathod names are hard-coded for now because some methods need special treatments. Can be fixed by giving eatch method it's own variable.
sc_method="scRNAseq"
all_methods=[sc_method,*sm_methods]
all_methods

['scRNAseq',
 'slide_seq',
 'merfish',
 'merfish_bin',
 'codex',
 'exseq',
 'exseq_bin']

In [9]:
run_samples=list(filter(lambda a: not a in [''], list(sample_sheet['sample'].unique()))) 
run_samples

['HTAPP-944-SMP-7479',
 'HTAPP-895-SMP-7359',
 'HTAPP-880-SMP-7179',
 'HTAPP-878-SMP-7149',
 'HTAPP-812-SMP-8239',
 'HTAPP-514-SMP-6760',
 'HTAPP-982-SMP-7629',
 'HTAPP-997-SMP-7789',
 'HTAPP-917-SMP-4531',
 'HTAPP-853-SMP-4381',
 'HTAPP-313-SMP-932',
 'HTAPP-330-SMP-1082',
 'HTAPP-783-SMP-4081',
 'HTAPP-364-SMP-1321',
 'HTAPP-213-SMP-6752']

In [76]:
adatas_combined=pd.Series([[] for i in range(len(sm_methods))],index=sm_methods,dtype=object) 
samples_combined=pd.Series([[] for i in range(len(sm_methods))],index=sm_methods,dtype=object) 
samples=[]
for sample in run_samples:
    print(sample)
    out_dir=out_dir_base+sample 
    adatas=get_and_process_data(sample,data_dir,out_dir,methods=sm_methods,process=False)
    for method in adatas.index:
        samples_combined[method].append(sample)
        adatas_combined[method].append(tc.get.counts(adatas[method],counts_location=('obsm','counts'),annotation=['replicate','cell_type','RCTD','OT','OT_max','x','y']))
        
for method in adatas_combined.index:
    adatas_combined[method]=create_scanpy(adatas_combined[method],replicates=samples_combined[method],var_genes=None,batch_key='sample',redo=False,process=True,mode=None)

HTAPP-944-SMP-7479
reading  slide_seq  from buffer
reading  merfish  from buffer
reading  merfish_bin  from buffer
No data for codex
No data for exseq
No data for exseq_bin
HTAPP-895-SMP-7359
reading  slide_seq  from buffer
No data for merfish
No data for merfish_bin
No data for codex
reading  exseq  from buffer
No data for exseq_bin
HTAPP-880-SMP-7179
reading  slide_seq  from buffer
reading  merfish  from buffer
reading  merfish_bin  from buffer
reading  codex  from buffer
reading  exseq  from buffer
reading  exseq_bin  from buffer
HTAPP-878-SMP-7149
reading  slide_seq  from buffer
reading  merfish  from buffer
reading  merfish_bin  from buffer
reading  codex  from buffer
No data for exseq
No data for exseq_bin
HTAPP-812-SMP-8239
reading  slide_seq  from buffer
reading  merfish  from buffer
reading  merfish_bin  from buffer
reading  codex  from buffer
No data for exseq
No data for exseq_bin
HTAPP-514-SMP-6760
reading  slide_seq  from buffer
reading  merfish  from buffer
reading  merfi

Trying to set attribute `.obs` of view, copying.
... storing 'replicate' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'RCTD' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT_max' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'replicate' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'RCTD' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT_max' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'replicate' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'RCTD' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'OT_max' as c

In [77]:
for method in sm_methods:
    adatas_combined[method].write(combined_dir+"/"+method+".h5ad",compression="gzip")

In [16]:
adatas_combined

slide_seq      [[[View of AnnData object with n_obs × n_vars ...
merfish        [[[View of AnnData object with n_obs × n_vars ...
merfish_bin    [[[View of AnnData object with n_obs × n_vars ...
codex          [[[View of AnnData object with n_obs × n_vars ...
exseq          [[[View of AnnData object with n_obs × n_vars ...
exseq_bin      [[[View of AnnData object with n_obs × n_vars ...
dtype: object

In [79]:
adatas_combined_all=tc.get.counts(adatas_combined[sm_methods[0]],counts_location=('obsm','counts'),annotation=['sample','replicate','cell_type','RCTD','OT','OT_max','x','y'])
adatas_combined_all.obs["method"]=sm_methods[0]
for method in sm_methods[1:]:
    print(method)
    adatas_combined[method].obs["method"]=method
    adatas_combined_all=adatas_combined_all.concatenate(tc.get.counts(adatas_combined[method],counts_location=('obsm','counts'),annotation=['sample','replicate','cell_type','RCTD','OT','OT_max','method','x','y']),join='outer')

merfish
merfish_bin
codex
exseq
exseq_bin


In [90]:
adatas_combined_all.write(combined_dir+"/all_methods.h5ad",compression="gzip")

... storing 'sample' as categorical
... storing 'replicate' as categorical
... storing 'RCTD' as categorical
... storing 'OT' as categorical
... storing 'OT_max' as categorical
... storing 'method' as categorical


In [100]:
tc.utils.bin(adatas_combined_all.obs,bin_size=50,bin_keys=["x_50","y_50"]);
tc.utils.hash(adatas_combined_all.obs,keys=["x_50","y_50","sample"],hash_key=["bin_50"]);

In [96]:
tc.utils.bin(adatas_combined_all.obs,bin_size=100,bin_keys=["x_100","y_100"]);
tc.utils.hash(adatas_combined_all.obs,keys=["x_100","y_100","sample"],hash_key=["bin_100"]);

In [97]:
tc.utils.bin(adatas_combined_all.obs,bin_size=200,bin_keys=["x_200","y_200"]);
tc.utils.hash(adatas_combined_all.obs,keys=["x_200","y_200","sample"],hash_key=["bin_200"]);

In [101]:
adatas_combined_all.obs.head()

Unnamed: 0,sample,replicate,RCTD,OT,OT_max,x,y,method,batch,x_100,y_100,bin_100,x_200,y_200,bin_200,x_50,y_50,bin_50
CTCATACTGGTGCC-1-HTAPP-944-SMP-7479-0-0-0-0-0,HTAPP-944-SMP-7479,1,MBC_neuronal,Endothelial_angiogenic,Endothelial_angiogenic,951.349219,5289.823421,slide_seq,0,10,57,0,5,28,0,21,115,0
AGCGCGAAAATCCC-1-HTAPP-944-SMP-7479-0-0-0-0-0,HTAPP-944-SMP-7479,1,MBC_neuronal,MBC_neuronal,MBC_neuronal,1693.617881,4487.136003,slide_seq,0,18,49,493,9,24,319,36,99,845
TCTAACATCATGCC-1-HTAPP-944-SMP-7479-0-0-0-0-0,HTAPP-944-SMP-7479,1,MBC_neuronal,MBC_neuronal,MBC_neuronal,1589.748012,5247.113366,slide_seq,0,17,57,771,8,28,470,34,114,1385
TGACCGGCCAACGA-1-HTAPP-944-SMP-7479-0-0-0-0-0,HTAPP-944-SMP-7479,1,MBC_neuronal,Endothelial_angiogenic,Endothelial_angiogenic,1480.906427,4807.731149,slide_seq,0,15,52,1142,7,26,718,31,105,2072
AAGCCAATGTCATC-1-HTAPP-944-SMP-7479-0-0-0-0-0,HTAPP-944-SMP-7479,1,MBC_neuronal,Endothelial_angiogenic,Endothelial_angiogenic,707.578823,4832.563237,slide_seq,0,8,53,1606,4,26,986,16,106,2936


In [102]:
adatas_combined_all.obs.to_csv(combined_dir+"/all_methods_obs.tsv", sep="\t")