In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from scipy.stats import f_oneway
from scipy.stats import kruskal
from mpl_toolkits.axes_grid1 import make_axes_locatable
import math
from matplotlib.colors import TwoSlopeNorm
import scipy
from matplotlib.cm import ScalarMappable
import matplotlib as mpl
from matplotlib.patches import Rectangle
mpl.rcParams['figure.dpi'] = 300


non_barcodes_HSC = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP-', 'HSC', 'B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']


non_barcodes_naive = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'MEP', 'CMP', 'GMP', 'MPP.Flk2n', 'MPP.Flk2p', 'HSC','B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']

non_barcodes_HSPC = ["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle", 
                       'CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC', 'B cell', 'Dendritic cells',
                        'Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.', 'Erythroblasts', 'Gran/Mono prog.',
                        'LMPPs', 'Mk prog.', 'Mono prog.', 'Monocytes', 'NK cells', 'Neutro prog.', 'Neutrophils',
                        'T cells', 'large pre-B.', 'pro-B', 'small pre-B.']

type_to_ct_HSC = {'Non-hematopoietic':["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle"],
              'HPC' : ['CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC','Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.',
                       'Gran/Mono prog.','LMPPs', 'Mk prog.', 'Mono prog.','Neutro prog.'],
              'Blood+Immune' : ['B cell', 'Dendritic cells','Erythroblasts','Monocytes', 'NK cells','Neutrophils','T cells', 'large pre-B.', 'pro-B', 'small pre-B.']}

type_to_ct_HSPC = {'Non-hematopoietic':["Chondrocytes","EC-Arteriar","EC-Arteriolar","EC-Sinusoidal",
                        "Fibroblasts","MSPC-Adipo","MSPC-Osteo","Myofibroblasts","Osteo",
                        "Osteoblasts","Pericytes","Schwann-cells","Smooth-muscle"],
              'HPC' : ['CLP', 'lin-', 'MEP', 'CMP', 'GMP', 'MPP', 'HSC','Eo/Baso prog.', 'Ery prog.', 'Ery/Mk prog.',
                       'Gran/Mono prog.','LMPPs', 'Mk prog.', 'Mono prog.','Neutro prog.'],
              'Blood+Immune' : ['B cell', 'Dendritic cells','Erythroblasts','Monocytes', 'NK cells','Neutrophils','T cells', 'large pre-B.', 'pro-B', 'small pre-B.']}


hspc_dirs = ["general_niche/",
'general_niche_mpp/',
'general_niche_cmp/',
'general_niche_clp/',
'general_niche_gmp/',
'general_niche_mep/']

hspc_n = ["hsc",
'mpp',
'cmp',
'clp',
'gmp',
'mep']

ct_to_type = {}
for i in type_to_ct_HSPC.keys():
    for j in type_to_ct_HSPC[i]:
        ct_to_type[j] = i 
    
def process_csv(filename):
    pp = pd.read_csv(filename, header  = 0)
    pp = pp.set_index('Unnamed: 0')
    pp.index.names = [None]
    return pp

In [None]:
bm_freq = pd.read_csv('bone_marrow_cell_frequencies.csv')
bm_freq = bm_freq.dropna(how='all')[['Cell Type', 'Frequency']]
bm_freq_dict = bm_freq.set_index('Cell Type').T.to_dict()

In [None]:
bm_freq_newFigure1 = {'ECArteriar':0.014298,
                      'ECSinusoidal':0.081722,
                      'MSPCAdipo':0.068125,
                      'MSPCOsteo':0.020325,
                      'MPP':0.391143494,
                      'HSC':0.00403241,
                      'KLS':0.391143494+0.00403241,
                      'GMP':0.384841643,
                      'MEP':0.148679469,
                      'T cells':2.335698011,
                      'B cell':8.081785369,
                      'Monocytes':3.068175516,
                      'Erythroblasts':28.036,
                      'Dendritic Cells':0.47,
                      'CLP':0.074846,
                      'CMP':0.779192}

In [None]:
iNiche_data_newFigure1 = {'Neutrophils':{
  'hsc': 0.847785269,
  'mpp': 0.847785269,
  'cmp': 0.851977044,
  'gmp': 0.922843894,
  'mep': 1.055440712,
  'clp': 1.059765593},
 'Erythroblasts': {
  'hsc': 0.522363585,
  'mpp': 0.522363585,
  'cmp': 0.547371915,
  'gmp': 0.573904496,
  'mep': 0.574599577,
  'clp': 0.609131267},
 'B cell': {
  'hsc': 1.007122188,
  'mpp': 1.007122188,
  'cmp': 1.019512721,
  'gmp': 0.979307362,
  'mep': 1.138657761,
  'clp': 1.166849789},
 'ECArteriar': {
  'hsc': 2.742944392,
  'mpp': 2.742944392,
  'cmp': 2.562830739,
  'gmp': 1.622457807,
  'mep': 1.35759407,
  'clp': 1.343677325},
 'ECSinusoidal': {
  'hsc': 4.518885832,
  'mpp': 4.518885832,
  'cmp': 4.057942739,
  'gmp': 2.697130431,
  'mep': 1.783971121,
  'clp': 1.631939807},
 'MSPCOsteo': {
  'hsc': 4.991285361,
  'mpp': 4.991285361,
  'cmp': 5.338127448,
  'gmp': 2.139212335,
  'mep': 1.067561633,
  'clp': 1.093687049},
 'MSPCAdipo': {
  'hsc': 4.991285361,
  'mpp': 4.991285361,
  'cmp': 5.338127448,
  'gmp': 2.139212335,
  'mep': 1.067561633,
  'clp': 1.093687049},
 'T cells': {
  'hsc': 1.054527042,
  'mpp': 1.054527042,
  'cmp': 1.160141162,
  'gmp': 1.011883485,
  'mep': 1.266271365,
  'clp': 1.195130659},
 'Dendritic Cells': {
  'hsc': 1.203685812,
  'mpp': 1.203685812,
  'cmp': 1.223114577,
  'gmp': 1.664641191,
  'mep': 1.595345161,
  'clp': 1.516036984},
 'GMP': {
  'hsc': 3.49095253,
  'mpp': 3.49095253,
  'cmp': 3.164607364,
  'gmp': 13.48378516,
  'mep': 7.978703657,
  'clp': 7.115930728},
 'MEP': {
  'hsc': 2.371877287,
  'mpp': 2.371877287,
  'cmp': 2.398335442,
  'gmp': 2.42675684,
  'mep': 4.520818923,
  'clp': 4.076626496},
 'Monocytes': {
  'hsc': 1.337371201,
  'mpp': 1.337371201,
  'cmp': 1.302715867,
  'gmp': 1.497346427,
  'mep': 1.310967896,
  'clp': 1.258538445},
 'HSC': {
  'hsc': 38.99549542,
  'mpp': 38.99549542,
  'cmp': 34.63658363,
  'gmp': 14.28002342,
  'mep': 8.326578588,
  'clp': 7.335009684},
'KLS': {
  'hsc': 38.99549542,
  'mpp': 38.99549542,
  'cmp': 34.63658363,
  'gmp': 14.28002342,
  'mep': 8.326578588,
  'clp': 7.335009684},
 'MPP': {
  'hsc': 38.99549542,
  'mpp': 38.99549542,
  'cmp': 34.63658363,
  'gmp': 14.28002342,
  'mep': 8.326578588,
  'clp': 7.335009684},
 'CMP': {
  'hsc': 0,
  'mpp': 0,
  'cmp': 1.879132218,
  'gmp': 1.019524274,
  'mep': 1.909292921,
  'clp': 1.673122169},
 'CLP': {
  'hsc': 0.334237859,
  'mpp': 0.334237859,
  'cmp': 0.296876791,
  'gmp': 0.810197607,
  'mep': 0.880810872,
  'clp': 2.240158903}}

In [None]:
iNiche_norms = {}
for hspc in ['hsc', 'mpp', 'cmp', 'gmp', 'mep', 'clp']:
    iNiche_norms[hspc] = []
    for i in bm_freq_newFigure1.keys():
        iNiche_norms[hspc].append(iNiche_data_newFigure1[i][hspc] * bm_freq_newFigure1[i] * 0.01)
    iNiche_norms[hspc] = max(iNiche_norms[hspc])
iNiche_norms

In [None]:
iNiche_normalized = {}
for i in bm_freq_newFigure1.keys():
    iNiche_normalized[i] = {}
    for j in ['hsc', 'mpp', 'cmp', 'gmp', 'mep', 'clp']:
        iNiche_normalized[i][j] = (iNiche_data_newFigure1[i][j] * bm_freq_newFigure1[i] * 0.01)/iNiche_norms[j]
pd.DataFrame.from_dict(iNiche_normalized).T.to_csv("iNiche_interaction_freq_distance.csv")

In [None]:
# add CMP and CLP to the dictionary, and add CMP and CLP to each dictionary entry...
bm_distance_newFigure1 = {'Neutrophils': {'Unnamed: 0': 'Neutrophils',
  'hsc': 0.959343018,
  'mpp': 0.959343018,
  'mep': 1.147068901,
  'gmp': 1.008924537,
  'cmp': 1.082060369,
  'clp': 1.040395624},
 'Erythroblasts': {'Unnamed: 0': 'Erythroblasts',
  'hsc': 0.613666622,
  'mpp': 0.613666622,
  'mep': 0.731968658,
  'gmp': 0.782901896,
  'cmp': 0.791655748,
  'clp': 0.965737809},
 'B cell': {'Unnamed: 0': 'B Cells',
  'hsc': 1.171762527,
  'mpp': 1.171762527,
  'mep': 1.295414868,
  'gmp': 1.009761199,
  'cmp': 1.277461667,
  'clp': 1.182621341},
 'ECArteriar': {'Unnamed: 0': 'Arteries',
  'hsc': 2.107968291,
  'mpp': 2.107968291,
  'mep': 1.123391084,
  'gmp': 1.054343538,
  'cmp': 1.846430642,
  'clp': 1.087211648},
 'ECSinusoidal': {'Unnamed: 0': 'Sinusoids',
  'hsc': 2.907236459,
  'mpp': 2.907236459,
  'mep': 1.103029468,
  'gmp': 1.411668877,
  'cmp': 0.664450148,
  'clp': 0.681319612},
 'MSPCAdipo': {'Unnamed: 0': 'Mesenchymal Stromal Cells',
  'hsc': 3.533582958,
  'mpp': 3.533582958,
  'mep': 0.439758214,
  'gmp': 1.06106717,
  'cmp': 1.535733388,
  'clp': 0.460013441},
 'MSPCOsteo': {'Unnamed: 0': 'Mesenchymal Stromal Cells',
  'hsc': 3.533582958,
  'mpp': 3.533582958,
  'mep': 0.439758214,
  'gmp': 1.06106717,
  'cmp': 1.535733388,
  'clp': 0.460013441},
 'T cells': {'Unnamed: 0': 'CD8 T Cells',
  'hsc': 1.210240463,
  'mpp': 1.210240463,
  'mep': 1.342643724,
  'gmp': 1.194415431,
  'cmp': 1.784286241,
  'clp': 0.942668732},
 'Dendritic Cells': {'Unnamed: 0': 'cDC',
  'hsc': 1.274946192,
  'mpp': 1.274946192,
  'mep': 1.424748033,
  'gmp': 1.606147709,
  'cmp': 1.611409132,
  'clp': 1.2395335},
 'GMP': {'Unnamed: 0': 'GMP',
  'hsc': 2.001594293,
  'mpp': 2.001594293,
  'mep': 1.540883416,
  'gmp': 7.821186215,
  'cmp': 1.649500868,
  'clp': 1.299050685},
 'MEP': {'Unnamed: 0': 'MEP',
  'hsc': 2.134689079,
  'mpp': 2.134689079,
  'mep': 3.169655764,
  'gmp': 1.569971102,
  'cmp': 1.580179834,
  'clp': 1.095457554},
 'Monocytes': {'Unnamed: 0': 'Monocytes/Macrophages',
  'hsc': 1.53648722,
  'mpp': 1.53648722,
  'mep': 1.479888361,
  'gmp': 1.336129615,
  'cmp': 1.826301042,
  'clp': 1.00019184},
 'HSC': {'Unnamed: 0': 'KLS',
  'hsc': 15.00398084,
  'mpp': 15.00398084,
  'mep': 2.316785078,
  'gmp': 2.211280903,
  'cmp': 4.807712018,
  'clp': 0.948885723},
 'KLS': {'Unnamed: 0': 'KLS',
  'hsc': 15.00398084,
  'mpp': 15.00398084,
  'mep': 2.316785078,
  'gmp': 2.211280903,
  'cmp': 4.807712018,
  'clp': 0.948885723},
 'MPP': {'Unnamed: 0': 'KLS',
  'hsc': 15.00398084,
  'mpp': 15.00398084,
  'mep': 2.316785078,
  'gmp': 2.211280903,
  'cmp': 4.807712018,
  'clp': 0.948885723},
 'CMP': {'Unnamed: 0': 'CMP',
  'hsc': 0.786286956,
  'mpp': 0.786286956,
  'mep': 0.867460868,
  'gmp': 0.74447571,
  'cmp': 5.150128959,
  'clp': 0.311185918},
 'CLP': {'Unnamed: 0': 'CLP',
  'hsc': 1.068708248,
  'mpp': 1.068708248,
  'mep': 1.044374915,
  'gmp': 1.209534095,
  'cmp': 0.497817807,
  'clp': 4.30074762}}

In [None]:
dist_norms = {}
for hspc in ['hsc', 'mpp', 'cmp', 'gmp', 'mep', 'clp']:
    dist_norms[hspc] = []
    for i in bm_freq_newFigure1.keys():
        dist_norms[hspc].append(bm_distance_newFigure1[i][hspc] * bm_freq_newFigure1[i] * 0.01)
    dist_norms[hspc] = max(dist_norms[hspc])
dist_norms

In [None]:
distance_normalized = {}
for i in bm_freq_newFigure1.keys():
    distance_normalized[i] = {}
    for j in ['hsc', 'mpp', 'cmp', 'gmp', 'mep', 'clp']:
        distance_normalized[i][j] = (bm_distance_newFigure1[i][j] * bm_freq_newFigure1[i] * 0.01)/dist_norms[j]
pd.DataFrame.from_dict(distance_normalized).T.to_csv("spatial_interaction_freq_distance.csv")

In [None]:
def pathway_vector_population(pathway, direction, nichetypes = None, spatial=False):
    
    total_df = pd.DataFrame()
    
    heat_vector = []

    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    if nichetypes:
        file_names = nichetypes
    
    for i in file_names:
        parent_path = 'metacell_population/'
        
        cell_type = i
        if direction == 's':
                filename = 'singleCell_to_' + cell_type
        else:
                filename = cell_type + "_to_singleCell"

        interaction_df = process_csv(parent_path + 'all_pathways/' + filename + '.csv')
        t = interaction_df.loc[['KLS', 'CMP','CLP', "GMP",'MEP']]

        if pathway in t.columns:
            total_df[cell_type]=t[pathway]
        else:
            total_df[cell_type]=0
    return total_df

In [None]:
def hspc_heatmap_population(pathway, direction, spatial = False,
                 nichetypes = None, hspcs = ['hsc', 'mpp', 'clp', 'cmp', 'gmp', 'mep']):
    
    
    #if pathway in get_pathways_by_signaling_type("ECM-Receptor"):
    #    print(pathway, "is an ECM-Receptor Pathway, sorry :(")
    #    return pd.DataFrame()
    
    file_names = [
                  "Chondrocytes", "ECArteriar", "ECArteriolar", "ECSinusoidal","Fibroblasts",
                  "MSPCAdipo", "MSPCOsteo", "Myofibroblasts", "Osteo", "Osteoblasts",
                  "Pericytes", "Schwanncells", "Smoothmuscle",
                  "CLP","lin","MEP","CMP","GMP","MPP","HSC",
                  "B cell","Dendritic cells",
                  "EoBaso prog","Ery prog","EryMk prog","Erythroblasts","GranMono prog",
                  "LMPPs","Mk prog","Mono prog","Monocytes","NK cells","Neutro prog",
                  
                  "Neutrophils","T cells","large preB","proB","small preB"
                 ]
    
    
    if nichetypes:
        file_names = nichetypes
    
    df = pathway_vector_population(pathway, direction, file_names,
                                      spatial = spatial)
    
    if len(df) < 4:
        print("Pathway not here, sorry :(")
        return pd.DataFrame()
    
    df.index = ['KLS', 'CMP','CLP', "GMP",'MEP']
    
    df = df/(df.max().max())
     
    df = df.T
        
    #specificity_index = pd.read_csv("pathway_si/"+ pathway + "_" + direction + "_si_v4.csv", index_col = 0)
    try:
        pathway_scale_factors = pd.read_csv("pathway_scale_factors.csv",index_col = 0)
        pathway_scale_factors = pathway_scale_factors[pathway_scale_factors.Pathway == pathway]
        scale_factor_dir = 'sending'
        if direction == 'r':
            scale_factor_dir = 'receiving'
        pathway_scale_factors = pathway_scale_factors[pathway_scale_factors.Direction == scale_factor_dir]
        pathway_scale_factor = list(pathway_scale_factors['max(LR) scale factor'])[0]
    except:
        pathway_scale_factor = 0
    #df = pd.DataFrame(df.values*specificity_index.values, columns=df.columns, index=df.index)
    
    df = df*pathway_scale_factor
    
    if spatial:
        if (pathway in get_pathways_by_signaling_type("Cell-Cell Contact")):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:                   
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= iNiche_normalized[nonHSPC][hspc_new.lower()]
        elif (pathway in get_pathways_by_signaling_type("Secreted Signaling")):
            for hspc in ['KLS', 'CMP','CLP', "GMP",'MEP']:
                for nonHSPC in list(df.index):
                    hspc_new = hspc
                    if hspc_new == 'KLS':
                        hspc_new = 'HSC'
                    df[hspc][nonHSPC] *= distance_normalized[nonHSPC][hspc_new.lower()]
        
    
    df = df[['KLS', 'CMP', 'CLP', 'GMP','MEPs']]
    
    #if pathway == 'GALECTIN' and direction == 'r' and spatial == False:
    #    df = df/0.124907230104345
    #if pathway == 'GALECTIN' and direction == 's' and spatial == True:
    #    df = df/0.007984458688668919
    #if pathway == 'PARs' and direction == 's' and spatial == True:
    #    df = df/0.002407949325914829
    #else:
    
    
    xticklabels = ['KLS', 'CMP','CLP', "GMP",'MEP']
    cg = sns.heatmap(df, linewidth = 0.3,
               cmap = 'BuGn', square = True, xticklabels=xticklabels)
    cg.add_patch(Rectangle((0,0), len(df.columns), len(df.index),
                       fill = False, edgecolor = 'black', lw = 1))
    
    which_title = ' Sending '
    if direction == 'r':
        which_title = ' Receiving '
    
    plt.title("" + which_title + pathway)
    
    fig = plt.gcf()
    fig.set_size_inches(3,2.5)
    plt.tight_layout()
    
    plt.show()
    
    
    
    return df

In [None]:
def save_spatial_corrected_heatmap(pathway_list, direction_list, spatial_factor, save_ext):
    for p,d in zip(pathway_list, direction_list):
        pandas_df = hspc_heatmap_population(p, d, spatial = spatial_factor,
             nichetypes = ['ECArteriar', 'ECSinusoidal', 'MSPCAdipo', 'MSPCOsteo',
                          'Erythroblasts', 'Monocytes','T cells', 'B cell', 'Dendritic Cells',
                          'KLS', 'CMP', 'CLP', 'GMP','MEP'],
             hspcs = ['KLS', 'CMP', 'CLP', 'GMP','MEP'])
        
        if len(pandas_df) > 0:
            pandas_df.to_csv("Figure_1_Pretty/" + p + "_" + d + save_ext + ".csv")

In [None]:
save_spatial_corrected_heatmap(['KIT'], ['r'], False, '_scale_factor_no_spatial')