In [40]:
import numpy as np
import pandas as pd
import geopandas as gpd

import anndata as ad
import scanpy as sc

import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
import matplotlib.colors as clr

import scvi
import squidpy as sq
import cellcharter as cc
import gc

In [41]:
sc.settings.n_jobs = -1

In [42]:
import torch
torch.set_float32_matmul_precision('high')
torch.cuda.is_available()

True

In [43]:
from lightning.pytorch import seed_everything
seed_everything(12345)

Seed set to 12345


12345

In [44]:
import shapely as sp 
import geopandas as gpd
import libpysal as lps 
import esda
import alphashape as ash

In [45]:
def get_boundary(coords):
    x = coords.get_coordinates().to_numpy()
    poly = lps.cg.alpha_shape_auto(x, step=1)
    return poly

In [46]:
adata = sc.read_h5ad("../../data/merfish/BAYSOR/seurat_objects/ns-atlas.merfish_baysor.scanvi_integrated.cellcharter.anndata.annotated.h5ad")

In [47]:
adata.obs.columns

Index(['EntityID', 'fov', 'volume', 'center_x', 'center_y', 'min_x', 'min_y',
       'max_x', 'max_y', 'anisotropy',
       ...
       'donor_age', 'race_ethnicity', 'DV200', 'RIN', 'preservation_type',
       'collection_type', 'component_id', 'sample_compartment',
       'tissue_compartment', 'compartment.short'],
      dtype='object', length=108)

In [48]:
del adata.obs['tissue_compartment']
del adata.obs['sample_compartment']
del adata.obs['compartment.short']


In [49]:
tissue_compartments = pd.read_csv("../../data/merfish/BAYSOR/metadata/tissue_compartment_annotations.csv", index_col=0)

  tissue_compartments = pd.read_csv("../../data/merfish/BAYSOR/metadata/tissue_compartment_annotations.csv", index_col=0)


In [50]:
tissue_compartments.index = tissue_compartments['cell_barcode'].values.astype(str).tolist()

In [51]:
tissue_compartments.head()

Unnamed: 0,cell_barcode,cell_id,sample_barcode,sample_compartment,tissue_compartment,compartment.short
1577244300000000000,1577244300000000000,D151_SKIN_NS_S04_R01_C1577244300000000000,D151_SKIN_NS_S04_R01,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000001,1577244300000000001,D151_SKIN_NS_S04_R01_C1577244300000000001,D151_SKIN_NS_S04_R01,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000002,1577244300000000002,D151_SKIN_NS_S04_R01_C1577244300000000002,D151_SKIN_NS_S04_R01,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000003,1577244300000000003,D151_SKIN_NS_S04_R01_C1577244300000000003,D151_SKIN_NS_S04_R01,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000006,1577244300000000006,D151_SKIN_NS_S04_R01_C1577244300000000006,D151_SKIN_NS_S04_R01,D151_SKIN_NS_S04_R01_DER,dermis,DER


In [52]:
tissue_compartments = tissue_compartments.loc[adata.obs_names.values.astype(str).tolist(), ['sample_compartment', 'tissue_compartment', "compartment.short"]].copy()

In [53]:
adata.obs.index = adata.obs_names.values.astype(str).tolist()

In [54]:
adata.obs = pd.concat([adata.obs, tissue_compartments], axis=1).copy()

In [55]:
adata.write_h5ad("../../data/merfish/BAYSOR/seurat_objects/ns-atlas.merfish_baysor.scanvi_integrated.cellcharter.anndata.annotated.h5ad", compression='gzip')

In [56]:
adata = adata[adata.obs['compartment.short'] != 'OUT'].copy()

In [57]:
adata.obs.head(10)

Unnamed: 0,EntityID,fov,volume,center_x,center_y,min_x,min_y,max_x,max_y,anisotropy,...,donor_age,race_ethnicity,DV200,RIN,preservation_type,collection_type,component_id,sample_compartment,tissue_compartment,compartment.short
1577244300000000000,1577244300000000000,,413.582367,10833.988067,5659.833278,10830.405562,5656.014926,10838.326891,5662.925291,1.535899,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C0,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000001,1577244300000000001,,954.476268,11088.754365,5967.087526,11081.775322,5960.465239,11095.173641,5971.33598,1.820862,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C0,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000002,1577244300000000002,,2314.160266,9533.604246,6785.310602,9526.056136,6778.549632,9543.619093,6794.195938,1.315076,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N0_C619,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000003,1577244300000000003,,1377.588139,10330.515975,5700.319847,10320.137869,5693.969074,10343.894569,5705.211938,3.583034,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N1_Cnan,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000006,1577244300000000006,,364.332251,11125.854334,6012.214286,11122.113357,6009.566081,11131.581724,6014.614107,1.857338,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C0,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000008,1577244300000000008,,1137.968435,10640.326353,7268.846994,10634.916613,7262.627689,10647.572479,7274.467481,1.099862,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N2_C2046,D151_SKIN_NS_S04_R01_EPI,epidermis,EPI
1577244300000000013,1577244300000000013,,471.085336,11066.605192,5821.27631,11061.106763,5816.906358,11072.093556,5825.109851,1.461268,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C0,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000014,1577244300000000014,,965.433572,10854.263308,5504.434883,10848.463487,5499.716736,10859.96204,5509.46475,1.07656,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C0,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000015,1577244300000000015,,514.916523,9348.450583,5482.567898,9344.775658,5477.369742,9351.470938,5487.585487,1.46811,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N6_C1,D151_SKIN_NS_S04_R01_DER,dermis,DER
1577244300000000017,1577244300000000017,,1179.214414,8472.04245,5949.88855,8467.57348,5941.958979,8477.315849,5958.83752,1.936929,...,36.0,,0.52,,FFPE,autopsy,D151_SKIN_NS_S04_R01_N2_C2046,D151_SKIN_NS_S04_R01_EPI,epidermis,EPI


In [58]:
sample_barcodes = adata.obs['sample_barcode'].values.unique().tolist()

In [59]:
sample_id = "D077_SKIN_NS_S04_R01"

In [60]:
compartment_info = adata.obs.loc[adata.obs['sample_barcode'] == sample_id, ['sample_barcode', 'anatomic_site', 'sample_compartment', 'tissue_compartment','total_area_mm2', 'compartment.short', 'center_x', 'center_y']].copy()


In [61]:
compartment_info.head()

Unnamed: 0,sample_barcode,anatomic_site,sample_compartment,tissue_compartment,total_area_mm2,compartment.short,center_x,center_y
1577590000000000000,D077_SKIN_NS_S04_R01,back,D077_SKIN_NS_S04_R01_DER,dermis,3.554131,DER,14816.99627,8320.576036
1577590000000000001,D077_SKIN_NS_S04_R01,back,D077_SKIN_NS_S04_R01_DER,dermis,3.554131,DER,14873.50362,8526.353086
1577590000000000003,D077_SKIN_NS_S04_R01,back,D077_SKIN_NS_S04_R01_DER,dermis,3.554131,DER,15241.864881,8735.600601
1577590000000000004,D077_SKIN_NS_S04_R01,back,D077_SKIN_NS_S04_R01_DER,dermis,3.554131,DER,14902.575772,8326.801012
1577590000000000005,D077_SKIN_NS_S04_R01,back,D077_SKIN_NS_S04_R01_DER,dermis,3.554131,DER,14178.356648,7250.963825


In [62]:
sample_barcodes = adata.obs['sample_barcode'].values.unique().tolist()
compartment_list = []
for sample_id in sample_barcodes:
    compartment_info = adata.obs.loc[adata.obs['sample_barcode'] == sample_id, ['sample_barcode', 'anatomic_site', 'sample_compartment', 'tissue_compartment','total_area_mm2', 'compartment.short', 'center_x', 'center_y']].copy()
    compartment_info['cell_centroids'] = gpd.points_from_xy(compartment_info['center_x'], compartment_info['center_y'])
    compartment_info = gpd.GeoDataFrame(compartment_info, geometry='cell_centroids')

    tissue_areas = compartment_info.groupby(['sample_barcode'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index()
    tissue_areas = tissue_areas[~tissue_areas['cell_centroids'].is_empty].copy()

    compartment_areas = compartment_info.groupby(['sample_compartment', 'compartment.short'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index().dropna()
    compartment_areas['geometry'] = compartment_areas['cell_centroids'].make_valid()
    compartment_areas['compartment_area_mm2'] = ((compartment_areas['geometry'].area)/1e6)
    compartment_areas.drop(columns=['cell_centroids'], inplace=True)
    compartment_areas['sample_barcode'] = sample_id

    compartment_areas['total_area_mm2'] = np.repeat(np.array((tissue_areas['cell_centroids'].area)/1e6)[0], len(compartment_areas))
    compartment_areas['compartment_proportion'] = (compartment_areas['compartment_area_mm2']/compartment_areas['compartment_area_mm2'].sum())
    compartment_areas['compartment_area_adjusted'] = np.array(compartment_areas['compartment_proportion']) * np.array((tissue_areas['cell_centroids'].area)/1e6)
    compartment_list.append(compartment_areas)

    del compartment_areas
    del tissue_areas

  tissue_areas = compartment_info.groupby(['sample_barcode'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index()
  compartment_areas = compartment_info.groupby(['sample_compartment', 'compartment.short'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index().dropna()
  tissue_areas = compartment_info.groupby(['sample_barcode'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index()
  compartment_areas = compartment_info.groupby(['sample_compartment', 'compartment.short'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index().dropna()
  tissue_areas = compartment_info.groupby(['sample_barcode'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index()
  compartment_areas = compartment_info.groupby(['sample_compartment', 'compartment.short'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index().dropna()
  tissue_areas = compartment_info.groupby(['sample_barcode'])['cell_centroids'].agg(lambda x: get_boundary(x)).reset_index()
  compar

In [63]:
compartment_areas = pd.concat(compartment_list, axis=0)

In [64]:
compartment_areas.head()

Unnamed: 0,sample_compartment,compartment.short,geometry,compartment_area_mm2,sample_barcode,total_area_mm2,compartment_proportion,compartment_area_adjusted
1020,D151_SKIN_NS_S04_R01_DER,DER,"POLYGON ((9585.983 6881.832, 9635.663 6916.875...",4.620182,D151_SKIN_NS_S04_R01,5.955045,0.819878,4.882408
1025,D151_SKIN_NS_S04_R01_EPI,EPI,"POLYGON ((10627.184 7267.071, 10640.326 7268.8...",0.208866,D151_SKIN_NS_S04_R01,5.955045,0.037064,0.22072
1031,D151_SKIN_NS_S04_R01_SUB,SUB,"POLYGON ((9801.201 5189.63, 10085.047 5424.71,...",0.806162,D151_SKIN_NS_S04_R01,5.955045,0.143058,0.851917
1044,D151_SKIN_NS_S06_R01_DER,DER,"POLYGON ((3474.236 6893.318, 3494.278 6884.882...",5.251671,D151_SKIN_NS_S06_R01,7.17919,0.750619,5.388834
1049,D151_SKIN_NS_S06_R01_EPI,EPI,"POLYGON ((3025.679 7043.073, 3057.933 7032.784...",0.170103,D151_SKIN_NS_S06_R01,7.17919,0.024313,0.174546


In [65]:
compartment_areas.to_csv("../../data/merfish/BAYSOR/metadata/tissue_compartment_areas.csv", index=False)
compartment_areas.to_parquet("../../data/merfish/BAYSOR/metadata/tissue_compartment_areas.parquet", index=False)

In [66]:
len(np.unique(compartment_areas['sample_barcode'].values.tolist()))

114