In [5]:
%load_ext autoreload
%autoreload 2

import warnings
from itertools import chain
from typing import List, Optional, Tuple

import cv2
import geopandas
import matplotlib
import matplotlib.colors as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import scanpy as sc
import seaborn as sns
import shapely
import squidpy as sq
import torch
from anndata import AnnData
from basicpy import BaSiC
from cellpose import models
from rasterio import features
from scipy import ndimage
import plotly.graph_objects as go

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
from napari_sparrow import functions as fc

In [7]:
path_mg = "../data/resolve_liver/markerGeneListMartinNoLow.csv"
path_adata_case = "../data/resolve_liver/A1-1_baysor/"
path_adata_ctrl = "../data/resolve_liver/A1-1_cellpose/"

# To use clean_celltypes, put clean = True
clean = True
if clean:
    celltypes = "clean_celltypes"
else:
    celltypes = "maxScores"

# Set number of genes to filter in the plot
filter_genes = 10

In [8]:
def adata_from_h5ad_geojson(path):
    path_h5ad = path + "adata.h5ad"
    path_geojson = path + "adata.geojson"
    
    adata = sc.read(path_h5ad)
    geop = geopandas.read_file(path_geojson, index_col=0)
    geop.index = geop['index']
    adata.obsm['polygons'] = geop
    adata.obsm['polygons']['linewidth'] = 1
    
    return adata

adata_case = adata_from_h5ad_geojson(path_adata_case)
adata_ctrl = adata_from_h5ad_geojson(path_adata_ctrl)



FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '../data.nosync/resolve_liver/A1-1_cellpose/adata.h5ad', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
if clean:
    mg_dict, scoresper_cluster = fc.scoreGenes(adata_ctrl, path_mg, repl_columns={"Tot_Score_": "", "uppfer": "upffer"}, del_genes=["Hepatocytes", "LSEC45"])
    colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF', '#800000', '#008000', '#000080', '#808000', '#800080', '#008080', '#808080', '#FFA500', '#FFC0CB']
    gene_indexes = {"Other_ImmuneCells": [1,2,8,14,15,16,17,18,19,21,22,26], "fibroblast": [4,5,23,25], "stellate": [28,29,30]}
    adata_ctrl_p, color_dict = fc.clustercleanliness(adata_ctrl, list(mg_dict.keys()), gene_indexes=gene_indexes, colors=colors)

    mg_dict, scoresper_cluster = fc.scoreGenes(adata_case, path_mg, repl_columns={"Tot_Score_": "", "uppfer": "upffer"}, del_genes=["Hepatocytes", "LSEC45"])
    colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF', '#800000', '#008000', '#000080', '#808000', '#800080', '#008080', '#808080', '#FFA500', '#FFC0CB']
    gene_indexes = {"Other_ImmuneCells": [1,2,8,14,15,16,17,18,19,21,22,26], "fibroblast": [4,5,23,25], "stellate": [28,29,30]}
    adata_case_p, color_dict = fc.clustercleanliness(adata_case, list(mg_dict.keys()), gene_indexes=gene_indexes, colors=colors)


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.


Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.



In [None]:
print(adata_ctrl.obs[celltypes].cat.categories)
print(adata_case.obs[celltypes].cat.categories)

Index(['B cells', 'Cholangiocytes', 'HepatocytesCentral', 'HepatocytesPortal',
       'Kupffer cells', 'LECs', 'LSEC Central', 'LSEC Portal',
       'Mesothelial cells', 'VSMC', 'central_vein_EC45', 'portal_vein_EC45',
       'Other_ImmuneCells', 'fibroblast', 'stellate'],
      dtype='object')
Index(['B cells', 'Cholangiocytes', 'HepatocytesCentral', 'HepatocytesPortal',
       'Kupffer cells', 'LECs', 'LSEC Central', 'LSEC Portal',
       'Mesothelial cells', 'VSMC', 'central_vein_EC45', 'portal_vein_EC45',
       'Other_ImmuneCells', 'fibroblast', 'stellate'],
      dtype='object')


In [None]:
def match_membrane(adataNuc,adataCell,cutoff=0):
    """Find for every cell in the first dataframe the matching cell in the second dataframe. 
    For some cells, no matching cells are found, those will get NaN's and should be dealt with. Some nuclei will get the same polyT stained cell, these are binucleated. 
    The cutoff decides the minimal overlap between the cells to be seen as overlapping. 0.5 is generally chosen to prevent two cells overlapping."""
    # Create geopandas objects with all info we need from the anndata object
    geopD=geopandas.GeoDataFrame(adataNuc.obsm['polygons'].geometry,geometry=adataNuc.obsm['polygons'].geometry)
    geopP=geopandas.GeoDataFrame(adataCell.obsm['polygons'].geometry,geometry=adataCell.obsm['polygons'].geometry)
    
    #join based on interesects to see which nuclei overlap with which cells, these are the only possible matchups
    possibilities=geopandas.sjoin(geopD,geopP,predicate='intersects',how='left')
    #For now filter out the nucleis that don't overlap with any cell
    Vclean=possibilities.loc[possibilities.index_right.isnull()==False,:]
    
    # Create an empty dataframe with all the nuclei, also the ones that don't match up. 
    polyT_match=pd.DataFrame(data=None,index=geopD.index)
    
    # Caclulate how much of the area is overlapping with all possible matches 
    Vclean['matchedArea']=Vclean.intersection(geopP.loc[Vclean.index_right],align=False).area/Vclean.area
    #filter out all matches that have less then cutoff in common 
    Vclean=Vclean.loc[Vclean.matchedArea>cutoff,:]
    Vclean.index=Vclean.index.rename('index')
    
    # for every nucleus, oly keep the most overlapping celll. 
    temp=Vclean.sort_values('matchedArea', ascending=False).loc[~Vclean.sort_values('matchedArea', ascending=False).index.duplicated(keep='first'),:]
    temp.index=temp.index.rename('index')
    
    polyT_match.index=polyT_match.index.rename('index')
    
    # Add this informatien to the empty pandas dataframe: in this way, the Nan's  are included
    polyT_match=polyT_match.merge(temp,how='left',on='index')
    polyT_match.drop(columns=['geometry'],inplace=True)
    
    return polyT_match

In [None]:
match = match_membrane(adata_ctrl, adata_case, cutoff=0.5)
match = match.dropna()


Geometry is in a geographic CRS. Results from 'area' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,index_right,matchedArea
index,Unnamed: 1_level_1,Unnamed: 2_level_1
9,3755,0.846981
11,11353,0.747967
14,3126,1.000000
15,11603,0.968943
17,4191,0.869071
...,...,...
22364,9238,0.863140
22379,8730,1.000000
22396,3322,0.686875
22406,5606,0.546477


In [None]:
label_list_ctrl = ["ctrl_" + i for i in list(adata_ctrl.obs[celltypes].cat.categories)]
label_list_case = ["case_" + i for i in list(adata_ctrl.obs[celltypes].cat.categories)]
label_list = label_list_ctrl + label_list_case
print(label_list)

total_cell_number = len(adata_ctrl.obs[celltypes])
total_celltypes = len(adata_ctrl.obs[celltypes].cat.categories)
celltype_index = adata_ctrl.obs[celltypes].cat.categories

source_list = []
target_list = []
for i in range(total_celltypes):
    source_list += [i] * total_celltypes
    target_list += list(range(total_celltypes, total_celltypes*2))

value_list = [0] * len(source_list)

['ctrl_B cells', 'ctrl_Cholangiocytes', 'ctrl_HepatocytesCentral', 'ctrl_HepatocytesPortal', 'ctrl_Kupffer cells', 'ctrl_LECs', 'ctrl_LSEC Central', 'ctrl_LSEC Portal', 'ctrl_Mesothelial cells', 'ctrl_VSMC', 'ctrl_central_vein_EC45', 'ctrl_portal_vein_EC45', 'ctrl_Other_ImmuneCells', 'ctrl_fibroblast', 'ctrl_stellate', 'case_B cells', 'case_Cholangiocytes', 'case_HepatocytesCentral', 'case_HepatocytesPortal', 'case_Kupffer cells', 'case_LECs', 'case_LSEC Central', 'case_LSEC Portal', 'case_Mesothelial cells', 'case_VSMC', 'case_central_vein_EC45', 'case_portal_vein_EC45', 'case_Other_ImmuneCells', 'case_fibroblast', 'case_stellate']


In [None]:
n = 0
for i in match.iterrows():
    ctrl = adata_ctrl.obs[celltypes][i[0]]
    case = adata_case.obs[celltypes][i[1][0]]
    value_position = celltype_index.get_loc(ctrl) * total_celltypes + celltype_index.get_loc(case)
    value_list[value_position] += 1

filtered_value_list = []
for i in value_list:
    if i <= filter_genes:
        filtered_value_list.append(0)
    else:
        filtered_value_list.append(i)
value_list = filtered_value_list

In [None]:
color_col = celltypes + "_colors"
color_codes = adata_ctrl.uns[color_col]
rgba_colors = []

for color in color_codes:
    # Extract red, green, and blue values from the color code
    red = int(color[1:3], 16)
    green = int(color[3:5], 16)
    blue = int(color[5:7], 16)

    # Format the rgba string with the desired opacity
    rgba_string = f'rgba({red}, {green}, {blue}, 0.8)'
    
    # Append the rgba string to the new list
    rgba_colors.append(rgba_string)

print(rgba_colors)
colors_list = rgba_colors * total_celltypes
print(colors_list)

['rgba(255, 0, 0, 0.8)', 'rgba(0, 255, 0, 0.8)', 'rgba(0, 0, 255, 0.8)', 'rgba(255, 255, 0, 0.8)', 'rgba(255, 0, 255, 0.8)', 'rgba(0, 255, 255, 0.8)', 'rgba(128, 0, 0, 0.8)', 'rgba(0, 128, 0, 0.8)', 'rgba(0, 0, 128, 0.8)', 'rgba(128, 128, 0, 0.8)', 'rgba(128, 0, 128, 0.8)', 'rgba(0, 128, 128, 0.8)', 'rgba(128, 128, 128, 0.8)', 'rgba(255, 165, 0, 0.8)', 'rgba(255, 192, 203, 0.8)']
['rgba(255, 0, 0, 0.8)', 'rgba(0, 255, 0, 0.8)', 'rgba(0, 0, 255, 0.8)', 'rgba(255, 255, 0, 0.8)', 'rgba(255, 0, 255, 0.8)', 'rgba(0, 255, 255, 0.8)', 'rgba(128, 0, 0, 0.8)', 'rgba(0, 128, 0, 0.8)', 'rgba(0, 0, 128, 0.8)', 'rgba(128, 128, 0, 0.8)', 'rgba(128, 0, 128, 0.8)', 'rgba(0, 128, 128, 0.8)', 'rgba(128, 128, 128, 0.8)', 'rgba(255, 165, 0, 0.8)', 'rgba(255, 192, 203, 0.8)', 'rgba(255, 0, 0, 0.8)', 'rgba(0, 255, 0, 0.8)', 'rgba(0, 0, 255, 0.8)', 'rgba(255, 255, 0, 0.8)', 'rgba(255, 0, 255, 0.8)', 'rgba(0, 255, 255, 0.8)', 'rgba(128, 0, 0, 0.8)', 'rgba(0, 128, 0, 0.8)', 'rgba(0, 0, 128, 0.8)', 'rgba(128, 1

In [None]:
fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    valuesuffix = " cells",
    node = dict(
      pad = 50,
      thickness = 20,
      line = dict(color = "black", width = 1),
      label = label_list,
      color = colors_list
    ),
    link = dict(
      source = source_list,
      target = target_list,
      value = value_list,
      color = colors_list
    ))],
    layout = dict(
      height = 1000,
      width = 1000
    ),
)
fig.show()