In [1]:
from pathlib import Path
import scanpy as sc
import cell2location
import matplotlib.pyplot as plt

DPI = 300
FONTSIZE = 20  # 42
sc.settings.set_figure_params(
    scanpy=True, dpi=100, transparent=True, vector_friendly=True, dpi_save=DPI
)
from matplotlib import rcParams

rcParams["pdf.fonttype"] = 42

import pandas as pd

from vistools import utils

import seaborn as sns
custom_params = {"font.size": FONTSIZE, "figure.dpi": 100, "savefig.dpi": DPI}
sns.set_theme(style="ticks", rc=custom_params)


Global seed set to 0


In [2]:
SAMPLE_NAME = "concat_withWu2022"
optimal_s = 1.2 #1.2  # 1 1.2 1.5 2

In [3]:
# store results
DIR2SAVE = Path(
    f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cell2loc_spatialde2/{SAMPLE_NAME}"
)  # to change
DIR2SAVE.mkdir(parents=True, exist_ok=True)

In [4]:
# figures
FIG2SAVE = DIR2SAVE.joinpath("figures/")
FIG2SAVE.mkdir(parents=True, exist_ok=True)
# set the global variable: sc.settings.figdir to save all plots
sc.settings.figdir = FIG2SAVE

In [5]:
adata = sc.read_h5ad(DIR2SAVE.joinpath(f"sp_segmentation_smoothness{optimal_s}.h5ad"))
adata

AnnData object with n_obs × n_vars = 16108 × 55
    obs: 'B', 'C1QC Mac', 'C3 iCAF', 'CD4 Tfh', 'CD4 Th', 'CD4 Th HSP', 'CD4 Th17', 'CD4 Tn', 'CD8 Tem', 'CD8 Tem GZMB', 'CD8 Tem HSP', 'CD8 Tex', 'Cholangiocyte', 'Colonocyte', 'ECM CAF', 'Enteroendocrine', 'FCN1 Mono', 'Goblet', 'HSP Mono', 'Hepatocyte', 'Hypoxia', 'IL1B Mac', 'Intermediate', 'Kupffer', 'Lymphatic endo', 'MAIT', 'Mast', 'Myofibroblast', 'NK1', 'NK2', 'NKT', 'NLRP3 Mac', 'Neutrophil', 'PLTP LYVE1 Mac', 'Pericyte', 'Plasma', 'SEC', 'SMC', 'SPP1 Mac', 'Stalk-like endo', 'Stem', 'Stem (NOTUM high)', 'TA1', 'TA2', 'Tip-like endo', 'Treg', 'Tuft', 'UPR', 'cDC1', 'cDC2', 'gdT', 'ipEMT', 'migDC', 'pDC', 'pEMT', 'segmentation_labels', 'Sample'
    uns: 'Sample_name_colors', '_scvi_manager_uuid', '_scvi_uuid', 'mod', 'segmentation_labels_colors', 'spatial'
    obsm: 'means_cell_abundance_w_sf', 'mt', 'q05_cell_abundance_w_sf', 'q05_mRNA_abundance_u_sf', 'q95_cell_abundance_w_sf', 'ribo', 'segmentation_class_probabilities', 'segme

In [6]:
df = pd.DataFrame(data=adata.obsm["q05_cell_abundance_w_sf"].to_numpy(),
                  index=adata.obs.index, columns=adata.uns["mod"]["factor_names"],
                 )
df

Unnamed: 0_level_0,B,C1QC Mac,C3 iCAF,CD4 Tfh,CD4 Th,CD4 Th HSP,CD4 Th17,CD4 Tn,CD8 Tem,CD8 Tem GZMB,...,Treg,Tuft,UPR,cDC1,cDC2,gdT,ipEMT,migDC,pDC,pEMT
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CRC08_LM_VISIUM_AAACAAGTATCTCCCA-1,0.000598,0.000064,0.000022,0.000365,0.000811,0.001681,0.001619,0.000898,0.000907,0.000488,...,0.000268,0.000482,0.012999,0.000056,0.000060,0.000401,0.001272,0.000069,0.000206,0.001876
CRC08_LM_VISIUM_AAACCGGAAATGTTAA-1,0.001441,0.000154,0.000230,0.000748,0.001416,0.001042,0.001674,0.001143,0.001382,0.001960,...,0.000923,0.060726,0.007303,0.000086,0.000112,0.001037,0.308789,0.000355,0.000295,0.334837
CRC08_LM_VISIUM_AAACCGTTCGTCCAGG-1,0.000523,0.000012,0.000092,0.000928,0.000643,0.000923,0.000954,0.000413,0.000868,0.000966,...,0.000789,0.000807,0.004257,0.000120,0.000030,0.000913,0.004188,0.000205,0.004973,0.000836
CRC08_LM_VISIUM_AAACCTAAGCAGCCGG-1,0.000121,0.000042,0.000030,0.000207,0.000356,0.000848,0.000197,0.000366,0.000357,0.000496,...,0.000553,0.000071,0.000472,0.000011,0.000011,0.000375,0.001547,0.000056,0.000033,2.112080
CRC08_LM_VISIUM_AAACGAGACGGTTGAT-1,0.001991,0.000158,0.000912,0.002296,0.002224,0.002558,0.003562,0.004195,0.002283,0.002148,...,0.003242,0.008842,0.007449,0.000241,0.000344,0.002107,0.134352,0.000367,0.000415,0.192628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ST_LM_4_TTGTTGTGTGTCAAGA-1,0.000969,0.000169,0.000133,0.000589,0.000664,0.001596,0.000748,0.000670,0.000911,0.000945,...,0.000653,0.001375,0.010535,0.000135,0.000107,0.000973,0.055422,0.000195,0.000137,0.009039
ST_LM_4_TTGTTTCACATCCAGG-1,0.000747,0.011145,0.314606,0.000970,0.001147,0.001217,0.001650,0.001089,0.001207,0.001609,...,0.000972,0.003104,0.001103,0.000098,0.000293,0.001586,0.001324,0.000196,0.000413,0.002929
ST_LM_4_TTGTTTCATTAGTCTA-1,0.000319,0.002629,0.286931,0.000438,0.000617,0.000612,0.001037,0.000467,0.000579,0.000703,...,0.000485,0.000440,0.000128,0.000104,0.000180,0.000824,0.000079,0.000165,0.000308,0.000506
ST_LM_4_TTGTTTCCATACAACT-1,0.009815,1.258921,0.823213,0.004059,0.007085,0.007374,0.018880,0.008179,0.008932,0.015840,...,0.006890,0.002227,0.001374,0.000510,0.003421,0.013324,0.001871,0.004349,0.001008,0.006868


In [7]:
df.describe()

Unnamed: 0,B,C1QC Mac,C3 iCAF,CD4 Tfh,CD4 Th,CD4 Th HSP,CD4 Th17,CD4 Tn,CD8 Tem,CD8 Tem GZMB,...,Treg,Tuft,UPR,cDC1,cDC2,gdT,ipEMT,migDC,pDC,pEMT
count,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,...,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0,16108.0
mean,0.030268,0.06270247,0.3038838,0.014625,0.006998,0.004575,0.027049,0.01691,0.010366,0.020806,...,0.011453,0.035199,0.037244,0.007035207,0.0200346,0.018703,0.068056,0.01318141,0.006004,0.120783
std,0.252829,0.2463045,0.5472808,0.125705,0.023866,0.013473,0.14567,0.123235,0.061657,0.125872,...,0.053873,0.078048,0.131691,0.03751417,0.09889161,0.108901,0.156885,0.06505248,0.029954,0.269605
min,5e-06,2.664018e-07,3.401717e-07,8e-06,1.2e-05,1.6e-05,9e-06,1.1e-05,1.4e-05,1.4e-05,...,8e-06,4e-06,1e-05,7.043322e-07,8.338378e-07,1.7e-05,1.1e-05,9.609782e-07,2e-06,8e-06
25%,0.000676,0.0001576283,0.0003496307,0.00053,0.000725,0.000895,0.000784,0.00066,0.000735,0.000709,...,0.000511,0.001009,0.001551,0.0001066078,0.0001297309,0.000746,0.000869,0.0001777238,0.000238,0.0021
50%,0.001925,0.003148445,0.01251657,0.001672,0.002282,0.002371,0.002912,0.001932,0.002485,0.002662,...,0.001839,0.00464,0.004111,0.0003651623,0.0004947606,0.002716,0.004304,0.0006518963,0.000692,0.012179
75%,0.004605,0.04255594,0.4426472,0.003887,0.005472,0.004774,0.008472,0.004678,0.006094,0.007277,...,0.005003,0.024609,0.011901,0.001548819,0.002119011,0.007313,0.03548,0.002560041,0.002106,0.115212
max,7.686192,8.452051,5.781144,3.441146,1.699685,0.543085,4.348102,3.047204,2.444069,4.304848,...,1.442283,0.881668,4.044514,1.18188,2.759869,2.71009,1.531018,2.023802,1.004589,3.352646


In [8]:
df.columns

Index(['B', 'C1QC Mac', 'C3 iCAF', 'CD4 Tfh', 'CD4 Th', 'CD4 Th HSP',
       'CD4 Th17', 'CD4 Tn', 'CD8 Tem', 'CD8 Tem GZMB', 'CD8 Tem HSP',
       'CD8 Tex', 'Cholangiocyte', 'Colonocyte', 'ECM CAF', 'Enteroendocrine',
       'FCN1 Mono', 'Goblet', 'HSP Mono', 'Hepatocyte', 'Hypoxia', 'IL1B Mac',
       'Intermediate', 'Kupffer', 'Lymphatic endo', 'MAIT', 'Mast',
       'Myofibroblast', 'NK1', 'NK2', 'NKT', 'NLRP3 Mac', 'Neutrophil',
       'PLTP LYVE1 Mac', 'Pericyte', 'Plasma', 'SEC', 'SMC', 'SPP1 Mac',
       'Stalk-like endo', 'Stem', 'Stem (NOTUM high)', 'TA1', 'TA2',
       'Tip-like endo', 'Treg', 'Tuft', 'UPR', 'cDC1', 'cDC2', 'gdT', 'ipEMT',
       'migDC', 'pDC', 'pEMT'],
      dtype='object')

In [9]:
df = df.merge(adata.obs[['segmentation_labels', 'Sample']], how='left', left_index=True, right_index=True)

In [10]:
df

Unnamed: 0_level_0,B,C1QC Mac,C3 iCAF,CD4 Tfh,CD4 Th,CD4 Th HSP,CD4 Th17,CD4 Tn,CD8 Tem,CD8 Tem GZMB,...,UPR,cDC1,cDC2,gdT,ipEMT,migDC,pDC,pEMT,segmentation_labels,Sample
spot_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CRC08_LM_VISIUM_AAACAAGTATCTCCCA-1,0.000598,0.000064,0.000022,0.000365,0.000811,0.001681,0.001619,0.000898,0.000907,0.000488,...,0.012999,0.000056,0.000060,0.000401,0.001272,0.000069,0.000206,0.001876,4,CRC08_LM_VISIUM
CRC08_LM_VISIUM_AAACCGGAAATGTTAA-1,0.001441,0.000154,0.000230,0.000748,0.001416,0.001042,0.001674,0.001143,0.001382,0.001960,...,0.007303,0.000086,0.000112,0.001037,0.308789,0.000355,0.000295,0.334837,7,CRC08_LM_VISIUM
CRC08_LM_VISIUM_AAACCGTTCGTCCAGG-1,0.000523,0.000012,0.000092,0.000928,0.000643,0.000923,0.000954,0.000413,0.000868,0.000966,...,0.004257,0.000120,0.000030,0.000913,0.004188,0.000205,0.004973,0.000836,8,CRC08_LM_VISIUM
CRC08_LM_VISIUM_AAACCTAAGCAGCCGG-1,0.000121,0.000042,0.000030,0.000207,0.000356,0.000848,0.000197,0.000366,0.000357,0.000496,...,0.000472,0.000011,0.000011,0.000375,0.001547,0.000056,0.000033,2.112080,7,CRC08_LM_VISIUM
CRC08_LM_VISIUM_AAACGAGACGGTTGAT-1,0.001991,0.000158,0.000912,0.002296,0.002224,0.002558,0.003562,0.004195,0.002283,0.002148,...,0.007449,0.000241,0.000344,0.002107,0.134352,0.000367,0.000415,0.192628,4,CRC08_LM_VISIUM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ST_LM_4_TTGTTGTGTGTCAAGA-1,0.000969,0.000169,0.000133,0.000589,0.000664,0.001596,0.000748,0.000670,0.000911,0.000945,...,0.010535,0.000135,0.000107,0.000973,0.055422,0.000195,0.000137,0.009039,7,ST_LM_4
ST_LM_4_TTGTTTCACATCCAGG-1,0.000747,0.011145,0.314606,0.000970,0.001147,0.001217,0.001650,0.001089,0.001207,0.001609,...,0.001103,0.000098,0.000293,0.001586,0.001324,0.000196,0.000413,0.002929,9,ST_LM_4
ST_LM_4_TTGTTTCATTAGTCTA-1,0.000319,0.002629,0.286931,0.000438,0.000617,0.000612,0.001037,0.000467,0.000579,0.000703,...,0.000128,0.000104,0.000180,0.000824,0.000079,0.000165,0.000308,0.000506,9,ST_LM_4
ST_LM_4_TTGTTTCCATACAACT-1,0.009815,1.258921,0.823213,0.004059,0.007085,0.007374,0.018880,0.008179,0.008932,0.015840,...,0.001374,0.000510,0.003421,0.013324,0.001871,0.004349,0.001008,0.006868,3,ST_LM_4


In [11]:
df.rename(columns={'ipEMT':'iREC', 'pEMT':'REC', 'Stem (NOTUM high)': 'Stem NOTUM',
                  }, inplace=True)

In [12]:
df.to_csv(DIR2SAVE.joinpath('cell2loc_cell_abundance_microenvs.csv'),
          sep='\t',
          header=True,
          index=True
         )

In [13]:
df.groupby('segmentation_labels').describe()

Unnamed: 0_level_0,B,B,B,B,B,B,B,B,C1QC Mac,C1QC Mac,...,pDC,pDC,REC,REC,REC,REC,REC,REC,REC,REC
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
segmentation_labels,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,833.0,0.008091,0.02002,8.3e-05,0.002403,0.004096,0.008603,0.506914,833.0,0.475022,...,0.004259,0.236379,833.0,0.061656,0.110863,0.000165,0.007061,0.016912,0.057865,0.974356
1,2058.0,0.006981,0.015213,0.000439,0.002044,0.003474,0.006609,0.378085,2058.0,0.096295,...,0.002771,0.487305,2058.0,0.133774,0.124704,0.000508,0.021728,0.101298,0.217772,0.695471
2,873.0,0.443673,0.98896,0.000126,0.00601,0.028584,0.379248,7.686192,873.0,0.051729,...,0.031799,1.004589,873.0,0.024758,0.091027,9e-06,0.000822,0.003423,0.015339,1.559269
3,1036.0,0.035082,0.074655,0.000345,0.006335,0.014165,0.031972,0.856583,1036.0,0.073356,...,0.014143,0.824654,1036.0,0.076422,0.217594,0.000243,0.007327,0.01768,0.063526,3.297682
4,1972.0,0.004459,0.039248,1.2e-05,0.000134,0.000446,0.001593,0.989591,1972.0,0.008488,...,0.000568,0.161499,1972.0,0.035519,0.096443,1.7e-05,0.000189,0.001053,0.009718,0.949675
5,1572.0,0.003584,0.016574,3.6e-05,0.000826,0.001597,0.00309,0.456078,1572.0,0.02808,...,0.001486,0.089705,1572.0,0.187593,0.261972,3.2e-05,0.005542,0.065715,0.28176,1.644374
6,1074.0,0.006345,0.015234,0.000105,0.002223,0.003504,0.005655,0.308764,1074.0,0.036596,...,0.002537,0.258154,1074.0,0.064323,0.084415,0.000142,0.009627,0.027474,0.089091,0.670084
7,1565.0,0.001543,0.010045,1.3e-05,0.000151,0.000379,0.000923,0.3033,1565.0,0.0006,...,0.000462,0.067921,1565.0,0.60605,0.544906,0.000107,0.187801,0.468396,0.889557,3.352646
8,2459.0,0.005093,0.055419,5e-06,0.000257,0.000726,0.001803,1.869001,2459.0,0.006883,...,0.00082,0.251879,2459.0,0.032717,0.082194,8e-06,0.000242,0.001255,0.01035,0.687672
9,2666.0,0.002477,0.003673,0.000208,0.001062,0.001736,0.002849,0.107023,2666.0,0.066369,...,0.001314,0.097746,2666.0,0.020699,0.043748,0.000181,0.002362,0.005554,0.015156,0.427088
