In [10]:
from pathlib import Path
import scanpy as sc
import random
import comseg
import comseg.dataset
from comseg import dictionary

import pandas as pd
import numpy as np
import time
import datetime

In [1]:
import sys
sys.path.append('../../../scripts/')

from paths import get_data_paths

In [2]:
data_path = get_data_paths('../../../data_mapping.yml')['mouse_gut']

In [6]:
e = datetime.datetime.now()
date_str = f"{e.month}_d{e.day}_h{e.hour}_min{e.minute}_s{e.second}_r" + str(random.randint(0, 5000))

In [7]:
max_cell_radius = 8
mean_cell_diameter = 10

path_dataset_folder = data_path / "comseg_format"
##path to your prior segmentation mask
path_to_mask_prior = path_dataset_folder

# path_dict_cell_centroid = args.path_dict_cell_centroid


path_save = str(path_dataset_folder / ("results/" + date_str + "/"))
Path(path_save).mkdir(parents=True, exist_ok=True)

In [None]:
#### writie the argument to a file
# with open(Path(path_save) / "script_parameter.txt", "w") as f:
#     for k, v in locals().items():
#         f.write(f"{k} : {v}\n")

## Main run

In [11]:
dict_scale = {"x": 0.17, 'y': 0.17, "z": 1.5}

### create the dataset object
dataset = comseg.dataset.ComSegDataset(
    path_dataset_folder=path_dataset_folder,
    path_to_mask_prior=path_to_mask_prior,
    dict_scale=dict_scale,
    mask_file_extension=".tif",
    mean_cell_diameter=mean_cell_diameter,
    prior_name='in_nucleus'
)

dataset.add_prior_from_mask(overwrite=True)

add data


add prior to data
prior added to data and saved in csv file
dict_centroid added for data 


In [13]:
dataset.compute_edge_weight(  # in micrometer
    images_subset=None,
    n_neighbors=40,
    sampling=True,
    sampling_size=10000
)
corr_matrix = []
np.save(path_to_mask_prior /'dict_co_expression_n40_50000.npy', dataset.dict_co_expression)

  0%|          | 0/1 [00:00<?, ?it/s]

image name :  data


100%|██████████| 1/1 [00:44<00:00, 44.93s/it]
100%|██████████| 241/241 [00:18<00:00, 13.29it/s]


In [None]:
Comsegdict = dictionary.ComSegDict(
    dataset=dataset,
    mean_cell_diameter=mean_cell_diameter,
    community_detection="with_prior"
)

Comsegdict.compute_community_vector()

Comsegdict.compute_insitu_clustering(
    size_commu_min=3,
    norm_vector=True,
    ### parameter clustering
    n_pcs=3,
    n_comps=3,
    clustering_method="leiden",
    n_neighbors=20,
    resolution=1,
    n_clusters_kmeans=4,
    palette=None,
    nb_min_cluster=0,
    min_merge_correlation=0.8,
)

In [16]:
palette = {}
for i in range(-1, 500):
    palette[str(i)] = "#" + "%06x" % random.randint(0, 0xFFFFFF)
adata = Comsegdict.in_situ_clustering.anndata_cluster
adata.obs["leiden_merged"] = adata.obs["leiden_merged"].astype(int)
# sc.tl.umap(adata)
# sc.pl.umap(adata, color=["leiden_merged"], palette=palette, legend_loc='on data')

In [18]:
Comsegdict.add_cluster_id_to_graph(clustering_method="leiden_merged")

### get a csv spot/cluster

gene_list = []
x_list = []
y_list = []
z_list = []
leiden = []
cell_id = []

img_name = list(Comsegdict.keys())[0]
for node in Comsegdict[img_name].G.nodes:
    gene_list.append(Comsegdict[img_name].G.nodes[node]["gene"])
    x_list.append(Comsegdict[img_name].G.nodes[node]["x"])
    y_list.append(Comsegdict[img_name].G.nodes[node]["y"])
    z_list.append(Comsegdict[img_name].G.nodes[node]["z"])
    leiden.append(Comsegdict[img_name].G.nodes[node]["leiden_merged"])

dictio = {'gene': gene_list, 'x': x_list, 'y': y_list,  'z': z_list,
            "leiden": leiden}
df = pd.DataFrame(dictio)

df.to_csv(Path(path_save) / "leiden0.csv")

Comsegdict.classify_centroid(
    path_cell_centroid=None,
    n_neighbors=15,
    dict_in_pixel=True,
    max_dist_centroid=None,
    key_pred="leiden_merged",
    distance="ngb_distance_weights",
    file_extension=".tiff.npy"
)

100%|██████████| 819665/819665 [00:01<00:00, 745604.34it/s]
100%|██████████| 1/1 [00:25<00:00, 25.78s/it]


In [19]:
Comsegdict.associate_rna2landmark(
    key_pred="leiden_merged",
    distance='distance',
    max_cell_radius=max_cell_radius
)

  0%|          | 0/1 [00:00<?, ?it/s]

data


100%|██████████| 480609/480609 [00:01<00:00, 368900.97it/s]
100%|██████████| 1/1 [08:09<00:00, 489.69s/it]


## Prepare output

In [20]:
gene_list = []
x_list = []
y_list = []
z_list = []
leiden = []
cell_index_pred_list = []

img_name = list(Comsegdict.keys())[0]
for node in Comsegdict[img_name].G.nodes:
    gene_list.append(Comsegdict[img_name].G.nodes[node]["gene"])
    x_list.append(Comsegdict[img_name].G.nodes[node]["x"])
    y_list.append(Comsegdict[img_name].G.nodes[node]["y"])
    z_list.append(Comsegdict[img_name].G.nodes[node]["z"])
    leiden.append(Comsegdict[img_name].G.nodes[node]["leiden_merged"])
    cell_index_pred_list.append(Comsegdict[img_name].G.nodes[node]["cell_index_pred"])

dictio = {'gene': gene_list, 'x': x_list, 'y': y_list, 'z': z_list,
            "leiden": leiden, "cell": cell_index_pred_list}
df = pd.DataFrame(dictio)
df.to_csv(Path(path_save) / "cell0_r10_rmax8_small_p.csv")

adata = Comsegdict.in_situ_clustering.anndata_cluster
adata.obs["leiden_merged"] = adata.obs["leiden_merged"].astype(int)
#sc.tl.umap(adata)
#fig_ledien = sc.pl.umap(adata, color=["leiden_merged"], palette=palette, legend_loc='on data',
    #                       )
### vizulaize  point cloud with napari

## Polygons?

Even alpha 1.0 doesn't help

In [None]:
final_anndata, dict_json_img = Comsegdict.anndata_from_comseg_result(alpha=1.0, allow_disconnected_polygon=True)
# filename = Path(path_save) / "result.h5ad"

In [29]:
adata

AnnData object with n_obs × n_vars = 8290 × 240
    obs: 'img_name', 'index_commu', 'nb_rna', 'leiden', 'leiden_merged'
    var: 'features'
    uns: 'pca', 'neighbors', 'leiden'
    obsm: 'X_pca'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [44]:
(final_anndata.uns['df_spots']['data'].cell_index_pred == 0).mean()

np.float64(0.16608492493884697)

In [47]:
final_anndata.write_h5ad(Path(path_save) / "result.h5ad")

In [52]:
df = final_anndata.uns['df_spots']['data'].copy()
del df['Unnamed: 0']
df.to_csv(Path(path_save) / "segmentation.csv")