# Evaluating Cell Type Transitions

This noteboo

In [1]:
import numpy as np
import anndata
import pandas as pd
import scanpy as sc
import scipy
import networkx as nx

In [2]:
Path="/home/mgander/moscot-framework_reproducibility/data/mouse_temporal"
ts=['E3.5', 'E4.5', 'E5.25', 'E5.5', 'E6.25', 'E6.5', 'E6.75', 'E7.0', 'E7.25', 'E7.5', 'E7.75', 'E8.0', 'E8.25', 'E8.5a', 'E8.5b', 'E9.5', 'E10.5', 'E11.5', 'E12.5', 'E13.5']

# Load and check curated transitions

In [3]:
######## Load the excel-file where all transitions between cell types corresponds to literature    ############## 
# This file can be found in the supplementary table

Lit=pd.read_csv(f'{Path}/Utils/Curated_transitions.csv', sep=';')
Lit

Unnamed: 0,Cell_type,Known_progenitors,Known_descendants,Germ_layer
0,Morula,/,"Inner cell mass, Trophectoderm",Other
1,Inner cell mass,Morula,"Epiblast, Hypoblast",Other
2,Trophectoderm,Morula,Extraembryonic ectoderm,Extraembryonic
3,Extraembryonic ectoderm,"Trophectoderm, Hypoblast",/,Extraembryonic
4,Hypoblast,Inner cell mass,"Parietal endoderm, Visceral endoderm, Extraemb...",Other
...,...,...,...,...
86,Hematoendothelial progenitors,"Nascent mesoderm, Extraembryonic mesoderm, Spl...","Endothelium, Blood progenitors",Mesoderm
87,White blood cells,Blood progenitors,/,Mesoderm
88,Megakaryocytes,Blood progenitors,/,Mesoderm
89,Primitive erythroid cells,Blood progenitors,/,Mesoderm


In [4]:
###################     Check for inconsistencies     ########################################################
Dict={}
k=0
for i in range(0,len(Lit)):
    ct_l=Lit['Cell_type']
    Dict[f'{ct_l[i]}']=i

# Check if all the decendants are also in progenitors of the respective cell type
for i in range(0, len(Lit)):
    ct=Lit['Cell_type'][i]
    for j in Lit['Known_descendants'][i].split(', '):
        if j=='/':
            pass
        else:
            if ct in Lit['Known_progenitors'][Dict[j]].split(', '):
                pass
            else:
                print(f'{ct} is not in progenitors of {j}')
                print(Lit['Known_progenitors'][Dict[j]].split(', '))
                k=k+1

# Check if all the progenitors are also in descendents of the respective cell type
for i in range(0, len(Lit)):
    ct=Lit['Cell_type'][i]
    for j in Lit['Known_progenitors'][i].split(', '):
        #print(j)
        if j=='/':
            pass
        else:
            if ct in Lit['Known_descendants'][Dict[j]].split(', '):
                pass
            else:
                print(f'{ct} is not in descendants of {j}')
                k=k+1
if k!=0:
    print('Inconsistencies in literature tree!')
else:
    print('Everything is consistent')

Everything is consistent


In [5]:
G= nx.DiGraph()

for n in Lit['Cell_type']:
    G.add_node(n)

for i in range(len(Lit)):
    cell_type=Lit['Cell_type'][i]
    desc=Lit['Known_descendants'][i].split(', ')
    for des in desc:
        G.add_edge(cell_type, des)
        
    # Add  transition to self
    G.add_edge(cell_type, cell_type)

# Evaluate

In [6]:
def evaluate_using_curated_transitions(G, df, cutoff):
    child_ct=[a.split(':')[1] for a in list(df.columns)]
    parent_ct=[a.split(':')[1] for a in list(df.index)]
    known_edges=list(G.edges)

    known_transition=0
    unknown_transition=0

    M=df.values

    for i,p in zip(range(len(parent_ct)), parent_ct):
        for j,c in zip(range(len(child_ct)),child_ct):
            edge_weight=M[i,j]

            if edge_weight>cutoff:

                if (p, c) in known_edges or c==p:
                    known_transition+=edge_weight
                else:
                    unknown_transition+=edge_weight
    all_transitions=unknown_transition+known_transition
    accuracy=known_transition/all_transitions
    return(accuracy, all_transitions)

In [7]:
def evaluate_using_germ_layers(Lit, df, cutoff):
    child_ct=[a.split(':')[1] for a in list(df.columns)]
    parent_ct=[a.split(':')[1] for a in list(df.index)]

    # Construct germ layer dict
    Germ_layer_dict=dict(zip(Lit['Cell_type'], Lit['Germ_layer']))

    # Manually add the cell types that have been sub-clusterd
    Germ_layer_dict['Osteoblast progenitors A']='Mesoderm'
    Germ_layer_dict['Osteoblast progenitors B']='Mesoderm'
    Germ_layer_dict['Paraxial mesoderm A']='Mesoderm'
    Germ_layer_dict['Paraxial mesoderm B']='Mesoderm'
    Germ_layer_dict['Paraxial mesoderm C']='Mesoderm'
    Germ_layer_dict['Amniochorionic mesoderm A']='Mesoderm'
    Germ_layer_dict['Amniochorionic mesoderm B']='Mesoderm'


    known_transition=0
    unknown_transition=0

    M=df.values

    for i,p in zip(range(len(parent_ct)), parent_ct):
        for j,c in zip(range(len(child_ct)),child_ct):
            edge_weight=M[i,j]

            if edge_weight>cutoff:

                p_germ=Germ_layer_dict[p]
                c_germ=Germ_layer_dict[c]

                # Don't consider cell types that are not asigned a germ layer
                if p_germ!='Other' and c_germ!='Other':

                    # Special case: Neural crest (neuroectoderm) is known to become osteoblasts (mesoderm) of the head
                    if p=='Neural crest' and c in ['Osteoblast progenitors A', 'Osteoblast progenitors B']:
                        known_transition+=edge_weight

                    else:
                        if p_germ==c_germ:
                            known_transition+=edge_weight
                        else:
                            unknown_transition+=edge_weight
    all_transitions=unknown_transition+known_transition
    if all_transitions==0:
        accuracy=1
    else:
        accuracy=known_transition/all_transitions
    return(accuracy, all_transitions)

In [29]:
# Cutoff used to exclude all edges with weight below that
cutoff=0.05

# Pre-gastrulation

In [30]:
frames=[]
for i in range(5):
    ts0=ts[i]
    ts1=ts[i+1]

    MT=pd.read_pickle(f'{Path}/moscot_maps_cell_type_transition_analysis/{ts0}_{ts1}_cell_type_transitions.pkl')
    TT=pd.read_csv(f'{Path}/TOME_maps_cell_type_transition_analysis/TOME_map_{ts0}_{ts1}.csv', index_col='Unnamed: 0').T
    # TOME-columns do not always sum to 1, but is always very close. To enable a fair comparision normalize both
    MT=MT/MT.sum(0)
    TT=TT/TT.sum(0)

    mc0, mc1=evaluate_using_curated_transitions(G, MT, cutoff)
    tc0, tc1=evaluate_using_curated_transitions(G, TT, cutoff)
    mg0, mg1=evaluate_using_germ_layers(Lit, MT, cutoff)
    tg0, tg1=evaluate_using_germ_layers(Lit, TT, cutoff)
    frames.append(pd.DataFrame({'moscot_curated_acc':[mc0], 'moscot_curated_total_weight':mc1,
                    'tome_curated_acc':tc0, 'tome_curated_total_weight':tc1,
                     'moscot_germ_acc':mg0, 'moscot_germ_total_weight':mg1,
                     'tome_germ_acc':tg0, 'tome_germ_total_weight':tg1}))
df=pd.concat(frames, ignore_index=True)
df['Developmental_stage']='Pre-gastrulation'

In [31]:
df.to_pickle(f'{Path}/Validation_output/Cell_type_transiton_validation_pre_gastrulation.npy')

# Gastrulation

In [32]:
frames=[]
for i in range(5,13):
    ts0=ts[i]
    ts1=ts[i+1]

    MT=pd.read_pickle(f'{Path}/moscot_maps_cell_type_transition_analysis/{ts0}_{ts1}_cell_type_transitions.pkl')
    TT=pd.read_csv(f'{Path}/TOME_maps_cell_type_transition_analysis/TOME_map_{ts0}_{ts1}.csv', index_col='Unnamed: 0').T
    # TOME-columns do not always sum to 1, but is always very close. To enable a fair comparision normalize both
    MT=MT/MT.sum(0)
    TT=TT/TT.sum(0)

    mc0, mc1=evaluate_using_curated_transitions(G, MT, cutoff)
    tc0, tc1=evaluate_using_curated_transitions(G, TT, cutoff)
    mg0, mg1=evaluate_using_germ_layers(Lit, MT, cutoff)
    tg0, tg1=evaluate_using_germ_layers(Lit, TT, cutoff)
    frames.append(pd.DataFrame({'moscot_curated_acc':[mc0], 'moscot_curated_total_weight':mc1,
                    'tome_curated_acc':tc0, 'tome_curated_total_weight':tc1,
                     'moscot_germ_acc':mg0, 'moscot_germ_total_weight':mg1,
                     'tome_germ_acc':tg0, 'tome_germ_total_weight':tg1}))
df=pd.concat(frames, ignore_index=True)
df['Developmental_stage']='Gastrulation'

In [33]:
df.to_pickle(f'{Path}/Validation_output/Cell_type_transiton_validation_gastrulation.npy')

# Organogenesis

In [34]:
frames=[]
for i in range(13,19):
    ts0=ts[i]
    ts1=ts[i+1]

    MT=pd.read_pickle(f'{Path}/moscot_maps_cell_type_transition_analysis/{ts0}_{ts1}_cell_type_transitions.pkl')
    TT=pd.read_csv(f'{Path}/TOME_maps_cell_type_transition_analysis/TOME_map_{ts0}_{ts1}.csv', index_col='Unnamed: 0').T
    # TOME-columns do not always sum to 1, but is always very close. To enable a fair comparision normalize both
    MT=MT/MT.sum(0)
    TT=TT/TT.sum(0)

    mc0, mc1=evaluate_using_curated_transitions(G, MT, cutoff)
    tc0, tc1=evaluate_using_curated_transitions(G, TT, cutoff)
    mg0, mg1=evaluate_using_germ_layers(Lit, MT, cutoff)
    tg0, tg1=evaluate_using_germ_layers(Lit, TT, cutoff)
    frames.append(pd.DataFrame({'moscot_curated_acc':[mc0], 'moscot_curated_total_weight':mc1,
                    'tome_curated_acc':tc0, 'tome_curated_total_weight':tc1,
                     'moscot_germ_acc':mg0, 'moscot_germ_total_weight':mg1,
                     'tome_germ_acc':tg0, 'tome_germ_total_weight':tg1}))
df=pd.concat(frames, ignore_index=True)
df['Developmental_stage']='Organogenesis'

In [35]:
df.to_pickle(f'{Path}/Validation_output/Cell_type_transiton_validation_organogenesis.npy')