In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import scanpy as sc
import skimage.io as sio
import random
import comseg
import comseg.dataset
from comseg import dictionary

import pandas as pd
import numpy as np
import time
import datetime
import os

In [1]:
import sys
sys.path.append('../../../scripts/')

from paths import get_data_paths
from seg_utils import create_xenium_dapi_mask

In [3]:
e = datetime.datetime.now()
date_str = f"{e.month}_d{e.day}_h{e.hour}_min{e.minute}_s{e.second}_r" + str(random.randint(0, 5000))

## Prepare data

In [4]:
DATA_FOLDER = get_data_paths()['human_ovarian_cancer']
INPUT_FOLDER = f'{DATA_FOLDER}/seg_method_results/comseg/input/'
os.makedirs(INPUT_FOLDER, exist_ok=True)

In [5]:
df_spatial = pd.read_parquet(f'{DATA_FOLDER}/subset/transcripts.parquet')
df_spatial.rename(
    columns={'x_location': 'x', 'y_location': 'y', 'z_location': 'z', 'feature_name': 'gene'},
    inplace=True
)

df_spatial = df_spatial[['x', 'y', 'z', 'gene']].copy()
df_spatial[['x', 'y', 'z']] /= 0.2125

df_spatial.to_csv(f'{INPUT_FOLDER}/data.csv')
df_spatial.shape[0]

291586

In [None]:
dapi_mask = create_xenium_dapi_mask(DATA_FOLDER, scale=0.2125, shape=(37631, 54089))

In [12]:
sio.imsave(f'{INPUT_FOLDER}/data.tif', dapi_mask)

  sio.imsave(f'{INPUT_FOLDER}/data.tif', dapi_mask)


## Run ComSeg

In [6]:
max_cell_radius = 8
mean_cell_diameter = 7

path_dataset_folder = INPUT_FOLDER
path_to_mask_prior = INPUT_FOLDER

path_save = str(Path(path_dataset_folder) / ("results/" + date_str + "/"))
Path(path_save).mkdir(parents=True, exist_ok=True)

## Main run

In [7]:
dict_scale = {"x": 0.2125, 'y': 0.2125, "z": 0.2125}

### create the dataset object
dataset = comseg.dataset.ComSegDataset(
    path_dataset_folder=path_dataset_folder,
    path_to_mask_prior=path_to_mask_prior,
    dict_scale=dict_scale,
    mask_file_extension=".tif",
    mean_cell_diameter=mean_cell_diameter,
    prior_name='in_nucleus'
)

dataset.add_prior_from_mask(overwrite=True)

prior added to data and saved in csv file
dict_centroid added for data 


add prior to data
prior added to data and saved in csv file
dict_centroid added for data 


In [8]:
dataset.compute_edge_weight(
    images_subset=None,
    n_neighbors=40,
    sampling=True,
    sampling_size=10000
)
corr_matrix = []
np.save(Path(path_to_mask_prior) /'dict_co_expression_n40_10000.npy', dataset.dict_co_expression)

Processing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:15<00:00, 15.19s/it]
  corr = scipy.stats.pearsonr(exp_gene_source, exp_gene_target)[0]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5250/5250 [1:44:28<00:00,  1.19s/it]


Code fails on this cell:

In [None]:
Comsegdict = dictionary.ComSegDict(
    dataset=dataset,
    mean_cell_diameter=mean_cell_diameter,
    community_detection="with_prior"
)

Comsegdict.compute_community_vector()

Comsegdict.compute_insitu_clustering(
    size_commu_min=3,
    norm_vector=True,
    ### parameter clustering
    n_pcs=3,
    n_comps=3,
    clustering_method="leiden",
    n_neighbors=20,
    resolution=1,
    n_clusters_kmeans=4,
    palette=None,
    nb_min_cluster=0,
    min_merge_correlation=0.8,
)

In [None]:
palette = {}
for i in range(-1, 500):
    palette[str(i)] = "#" + "%06x" % random.randint(0, 0xFFFFFF)
adata = Comsegdict.in_situ_clustering.anndata_cluster
adata.obs["leiden_merged"] = adata.obs["leiden_merged"].astype(int)
# sc.tl.umap(adata)
# sc.pl.umap(adata, color=["leiden_merged"], palette=palette, legend_loc='on data')

In [None]:
Comsegdict.add_cluster_id_to_graph(clustering_method="leiden_merged")

### get a csv spot/cluster

gene_list = []
x_list = []
y_list = []
z_list = []
leiden = []
cell_id = []

img_name = list(Comsegdict.keys())[0]
for node in Comsegdict[img_name].G.nodes:
    gene_list.append(Comsegdict[img_name].G.nodes[node]["gene"])
    x_list.append(Comsegdict[img_name].G.nodes[node]["x"])
    y_list.append(Comsegdict[img_name].G.nodes[node]["y"])
    z_list.append(Comsegdict[img_name].G.nodes[node]["z"])
    leiden.append(Comsegdict[img_name].G.nodes[node]["leiden_merged"])

dictio = {'gene': gene_list, 'x': x_list, 'y': y_list,  'z': z_list,
            "leiden": leiden}
df = pd.DataFrame(dictio)

df.to_csv(Path(path_save) / "leiden0.csv")

Comsegdict.classify_centroid(
    path_cell_centroid=None,
    n_neighbors=15,
    dict_in_pixel=True,
    max_dist_centroid=None,
    key_pred="leiden_merged",
    distance="ngb_distance_weights",
    file_extension=".tiff.npy"
)

In [19]:
Comsegdict.associate_rna2landmark(
    key_pred="leiden_merged",
    distance='distance',
    max_cell_radius=max_cell_radius
)

  0%|          | 0/1 [00:00<?, ?it/s]

data


100%|██████████| 480609/480609 [00:01<00:00, 368900.97it/s]
100%|██████████| 1/1 [08:09<00:00, 489.69s/it]
