In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from scipy.stats import f_oneway
from scipy.stats import kruskal
from mpl_toolkits.axes_grid1 import make_axes_locatable
import math
from matplotlib.colors import TwoSlopeNorm
import scipy
from matplotlib.cm import ScalarMappable
import matplotlib as mpl
from matplotlib.patches import Rectangle
mpl.rcParams['figure.dpi'] = 300


non_barcodes_HSC = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP-', 'HSC', 'B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']


non_barcodes_naive = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'MEP', 'CMP', 'GMP', 'MPP.Flk2n', 'MPP.Flk2p', 'HSC','B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']

non_barcodes_HSPC = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC', 'B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']

type_to_ct_HSC = {'Non-hematopoietic':["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle"],
              'HPC' : ['CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC','Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.',
                       'Gran/Mono prog.','LMPPs', 'Mk prog.', 'Mono prog.','Neutro prog.'],
              'Blood+Immune' : ['B cell', 'Dendritic cells','Erythroblasts','Monocytes', 'NK cells','Neutrophils','T cells', 'large pre-B.', 'pro-B', 'small pre-B.']}

type_to_ct_HSPC = {'Non-hematopoietic':["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle"],
              'HPC' : ['CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC','Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.',
                       'Gran/Mono prog.','LMPPs', 'Mk prog.', 'Mono prog.','Neutro prog.'],
              'Blood+Immune' : ['B cell', 'Dendritic cells','Erythroblasts','Monocytes', 'NK cells','Neutrophils','T cells', 'large pre-B.', 'pro-B', 'small pre-B.']}


hspc_dirs = ["general_niche/",
'general_niche_mpp/',
'general_niche_cmp/',
'general_niche_clp/',
'general_niche_gmp/',
'general_niche_mep/']

hspc_n = ["hsc",
'mpp',
'cmp',
'clp',
'gmp',
'mep']

ct_to_type = {}
for i in type_to_ct_HSPC.keys():
    for j in type_to_ct_HSPC[i]:
        ct_to_type[j] = i 
    
def process_csv(filename):
    pp = pd.read_csv(filename, header  = 0)
    pp = pp.set_index('Unnamed: 0')
    pp.index.names = [None]
    return pp

def get_pathways_by_signaling_type(signaling_type):
    cellchat_db = pd.read_csv("../interaction_genes.csv")
    pathway_annot = cellchat_db[['pathway_name', 'annotation']]
    pathway_annot = pathway_annot[pathway_annot['annotation'] == signaling_type]
    
    pathways = list(set(list(pathway_annot['pathway_name'])))
    
    return pathways

In [None]:
bm_freq = pd.read_csv('bone_marrow_cell_frequencies.csv')
bm_freq = bm_freq.dropna(how='all')[['Cell Type', 'Frequency']]
bm_freq_dict = bm_freq.set_index('Cell Type').T.to_dict()

In [None]:
#bm_distance = pd.read_csv('bone_marrow_distancing.csv', index_col = 1)
bm_distance = pd.read_csv('bone_marrow_distancing_ratio_v5.csv', index_col = 1)
#bm_distance = pd.read_csv('bone_marrow_distancing_ratio_v2.csv', index_col = 1)
bm_distance = bm_distance.dropna(how='all')
bm_distance = bm_distance.T.to_dict()

In [None]:
iNiche_data = pd.read_csv("HSPC_enrichment_iniche_dict.csv", index_col = 0).T
iNiche_data = iNiche_data.to_dict()

In [None]:
def pathway_vector(hspc, pathway, direction, nichetypes = None, facs=False, spatial=False):
    
    parent_path = 'general_niche'
    if hspc != 'hsc':
        parent_path += "_" + hspc + "/"
    else:
        parent_path += '/'
    
    #parent_path += "_" + hspc + "Metacell/"
    
    heat_vector = []
    
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    if nichetypes:
        file_names = nichetypes
    
    for i in file_names:
        t = get_specific_interaction(parent_path, hspc, i, direction)
        if pathway in t.columns:
            if facs:
                heat_vector.append(t[pathway].sum()*bm_freq_newFigure1[i]*0.01)
            elif spatial:
                if pathway in get_pathways_by_signaling_type("Cell-Cell Contact"):
                    heat_vector.append(t[pathway].sum()*bm_freq_newFigure1[i]*0.01*iNiche_data_newFigure1[i][hspc])
                elif pathway in get_pathways_by_signaling_type("Secreted Signaling"):
                    heat_vector.append(t[pathway].sum()*bm_freq_newFigure1[i]*0.01*bm_distance_newFigure1[i][hspc])
                else:
                    return None
            else:
                heat_vector.append(t[pathway].sum())
        else:
            heat_vector.append(0)
    return heat_vector

In [None]:
def hspc_heatmap(pathway, direction, facs = False, spatial = False,
                 nichetypes = None, hspcs = ['hsc', 'mpp', 'clp', 'cmp', 'gmp', 'mep']):
    
    
    if pathway in get_pathways_by_signaling_type("ECM-Receptor"):
        print(pathway, "is an ECM-Receptor Pathway, sorry :(")
        return None
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    heat_mat = []
    
    if nichetypes:
        file_names = nichetypes
    
    for hspc in hspcs:
        heat_mat.append(pathway_vector(hspc, pathway, direction, file_names,
                                      facs = facs, spatial = spatial))
    
    df = pd.DataFrame(heat_mat,columns = file_names)
    df.index = hspcs
    df = df.T

    
    df['kls'] = df['hsc']*0.1 + df['mpp']*0.9
    
    df = df[['kls', 'gmp', 'mep', 'cmp','clp']]

    
    if 'HSC' in file_names:
        df = df.T
        df['HSC'] = df['HSC']*0.1 + df['MPP']*0.9
        df.rename(columns = {"HSC":"KLS"}, inplace = True)
        df = df.drop(columns = ['MPP'])
        df = df.T
    
    print(df.max())
    
    df = df/(df.max().max())
    
    
    
    xticklabels = ['KLS', 'GMP', 'MEP', 'CMP', "CLP"]
    cg = sns.heatmap(df, linewidth = 0.3,
               cmap = 'BuGn', square = True, xticklabels=xticklabels)
    cg.add_patch(Rectangle((0,0), len(df.columns), len(df.index),
                       fill = False, edgecolor = 'black', lw = 1))
    
    which_title = ' Sending '
    if direction == 'r':
        which_title = ' Receiving '
    
    plt.title("" + which_title + pathway)
    
    fig = plt.gcf()
    fig.set_size_inches(3,2.5)
    plt.tight_layout()
    
    plt.show()
    
    
    
    return df

In [None]:
def hspc_population_raw(pathway, direction, spatial = False,
                 nichetypes = None, hspcs = ['hsc', 'mpp', 'clp', 'cmp', 'gmp', 'mep']):
    
    
    if pathway in get_pathways_by_signaling_type("ECM-Receptor"):
        print(pathway, "is an ECM-Receptor Pathway, sorry :(")
        return None
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    
    if nichetypes:
        file_names = nichetypes
    
    df = pathway_vector_population(pathway, direction, file_names,
                                      spatial = spatial)
    
    df.index = ['KLS', 'CMP', 'CLP', 'GMP','MEP']
    
    if spatial:
        for colname in df.columns:
            df[colname] *= bm_freq_newFigure1[colname]*0.01
    
    df = df.T
        
    if spatial:
        if pathway in get_pathways_by_signaling_type("Cell-Cell Contact"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:                   
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= iNiche_data_newFigure1[nonHSPC][hspc_new.lower()]
        elif pathway in get_pathways_by_signaling_type("Secreted Signaling"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= bm_distance_newFigure1[nonHSPC][hspc_new.lower()]
        

    
    df = df[['KLS', 'CMP', 'CLP', 'GMP','MEP']]
    
    return df

In [None]:
def hspc_heatmap_population_raw(pathway, direction, spatial = False,
                 nichetypes = None, hspcs = ['hsc', 'mpp', 'clp', 'cmp', 'gmp', 'mep']):
    
    
    if pathway in get_pathways_by_signaling_type("ECM-Receptor"):
        print(pathway, "is an ECM-Receptor Pathway, sorry :(")
        return None
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    
    if nichetypes:
        file_names = nichetypes
    
    df = pathway_vector_population(pathway, direction, file_names,
                                      spatial = spatial)
    
    df.index = ['KLS', 'CMP','CLP', "GMP",'MEP']
    
    if spatial:
        for colname in df.columns:
            df[colname] *= bm_freq_newFigure1[colname]*0.01
    
    df = df.T
        
    if spatial:
        if pathway in get_pathways_by_signaling_type("Cell-Cell Contact"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:                   
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= iNiche_data_newFigure1[nonHSPC][hspc_new.lower()]
        elif pathway in get_pathways_by_signaling_type("Secreted Signaling"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= bm_distance_newFigure1[nonHSPC][hspc_new.lower()]
        

    
    df = df[['KLS', 'CMP', 'CLP', 'GMP','MEP']]
    
    return df

In [None]:
def hspc_heatmap_population(pathway, direction, spatial = False,
                 nichetypes = None, hspcs = ['hsc', 'mpp', 'clp', 'cmp', 'gmp', 'mep']):
    
    
    if pathway in get_pathways_by_signaling_type("ECM-Receptor"):
        print(pathway, "is an ECM-Receptor Pathway, sorry :(")
        return pd.DataFrame()
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    
    if nichetypes:
        file_names = nichetypes
    
    df = pathway_vector_population(pathway, direction, file_names,
                                      spatial = spatial)
    
    if len(df) < 4:
        print("Pathway not here, sorry :(")
        return pd.DataFrame()
    
    df.index = ['KLS', 'CMP','CLP', "GMP",'MEP']
    
    if spatial:
        for colname in df.columns:
            df[colname] *= bm_freq_newFigure1[colname]*0.01
    
    df = df.T
        
    if spatial:
        if pathway in get_pathways_by_signaling_type("Cell-Cell Contact"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:                   
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= iNiche_data_newFigure1[nonHSPC][hspc_new.lower()]
        elif pathway in get_pathways_by_signaling_type("Secreted Signaling"):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= bm_distance_newFigure1[nonHSPC][hspc_new.lower()]
        
    
    df = df[['KLS', 'CMP', 'CLP', 'GMP','MEP']]
    
    if pathway == 'GALECTIN' and direction == 'r' and spatial == False:
        df = df/0.124907230104345
    if pathway == 'GALECTIN' and direction == 's' and spatial == True:
        df = df/0.007984458688668919
    if pathway == 'PARs' and direction == 's' and spatial == True:
        df = df/0.002407949325914829
    else:
        df = df/(df.max().max())
    
    xticklabels = ['KLS', 'CMP','CLP', "GMP",'MEP']
    cg = sns.heatmap(df, linewidth = 0.3,
               cmap = 'BuGn', square = True, xticklabels=xticklabels, vmax = 1)
    cg.add_patch(Rectangle((0,0), len(df.columns), len(df.index),
                       fill = False, edgecolor = 'black', lw = 1))
    
    which_title = ' Sending '
    if direction == 'r':
        which_title = ' Receiving '
    
    plt.title("" + which_title + pathway)
    
    fig = plt.gcf()
    fig.set_size_inches(3,2.5)
    plt.tight_layout()
    
    plt.show()
    
    
    
    return df

In [None]:
def pathway_vector_population(pathway, direction, nichetypes = None, spatial=False):
    
    total_df = pd.DataFrame()
    
    heat_vector = []

    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    if nichetypes:
        file_names = nichetypes
    
    for i in file_names:
        parent_path = 'metacell_population/'
        
        cell_type = i
        if direction == 's':
                filename = 'singleCell_to_' + cell_type
        else:
                filename = cell_type + "_to_singleCell"

        interaction_df = process_csv(parent_path + 'all_pathways/' + filename + '.csv')
        t = interaction_df.loc[['KLS', 'CMP','CLP', "GMP",'MEP']]

        if pathway in t.columns:
            total_df[cell_type]=t[pathway]
        else:
            total_df[cell_type]=0
    return total_df

In [None]:
pathwayName = 'MIF'
pathwayDirection = 's' # sending
pandas_df = hspc_heatmap_population(pathwayList[n], directionList[n], spatial= False,
             nichetypes = ['ECArteriar', 'ECSinusoidal', 'MSPCAdipo', 'MSPCOsteo',
                          'Erythroblasts', 'Monocytes','T cells', 'B cell', 'Dendritic Cells',
                          'KLS', 'CMP', 'CLP', 'GMP','MEP'],
             hspcs = ['KLS', 'CMP', 'CLP', 'GMP','MEP'])