In [None]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu
import statsmodels.api as sm
from statsmodels import stats
from os import path

In [None]:
# takes in interaction data and names of two groups to-be-compared
# returns df with cell type interaction compared, pval, pval_adj, and whether or not it is significant (based on pval_adj)
def mann_whitney(dat,grp1,grp2):
    mann_pvals = []
    celltypes = list(np.unique(dat['celltype']))
    for celltype in celltypes:
        dat_celltype = dat[dat['celltype']==celltype].copy()
        if np.std(dat_celltype['interaction_norm1']) > 0:
            grp1 = dat_celltype[dat_celltype['humanid']==grp1].copy()
            grp2 = dat_celltype[dat_celltype['humanid']==grp2].copy()
            grp1 = grp1['interaction_norm1']
            grp2 = grp2['interaction_norm1']
            U1, p = mannwhitneyu(grp1,grp2)
            mann_pvals.append([celltype,p])
    
    dat_final = pd.DataFrame('mann_pvals')
    dat_final = dat_final.rename(columns={0:'celltype',1:'pval'})
    dat_final = dat_final.sort_values(by=['pval'],ascending=True)
    dat_final_p = dat_final['pval']
    pval_adj = stats.multitest.fdrcorrection(dat_final_p, alpha=0.05, method='indep', is_sorted=True)
    dat_final['pval_adj'] = pval_adj[1]
    dat_final['sig_stat'] = pval_adj[0]

### Statistics for human interactions

In [None]:
pheno_to_humanid = {
    'Injured': '19',
    'Healthy': '21'
}

In [None]:
section = 'medulla'
cell_type = 'CDPC'
# input_path is path to file with interactions of specified cell type in specified section with 
# all other cell types for all instances of the cell type across all arrays to be compared
input_path = 'human_{section}_{celltype}_interactions.csv'.format(section=section,celltype=cell_type)
dat = pd.read_csv(input_path,index_col=0)
dat = dat.drop(columns={'celltype_id'})
dat['humanid'] = [pheno_to_humanid[x] for x in dat['pheno']]
dat = dat[['puckid','humanid','celltype','interaction_norm1']]
dat = dat.replace({'Endothelial':'EC','Mesangial':'MC','Ren1':'GC','Immune':'Other_Immune','CDPC':'CD-PC','CDIC':'CD-IC'})
if section =='medulla':
    dat = dat[~dat['celltype'].isin(['PCT','Podocyte','GC','MD','MC'])]
dat = dat.reset_index()
dat = dat.drop(columns={'index'})

In [None]:
result = mann_whitney(dat,'19','21')
out_path = 'human_{section}_{celltype}_mannu_pvals.csv'.format(section=section,celltype=cell_type)
result.to_csv(out_path)

### Statistics for mouse interactions

In [None]:
section = 'cortex'
cell_type = 'TAL'
# input_path is path to file with interactions of specified cell type in specified section with 
# all other cell types for all instances of the cell type across all arrays to be compared
input_path = 'mouse_{section}_{celltype}_interactions.csv'.format(section=section,celltype=cell_type)
dat = pd.read_csv(input_path)
dat = dat.drop(columns={'celltype_id'})
dat = dat[['puckid','pheno','celltype','interaction_norm1']]
dat = dat.replace({'Endothelial':'EC','Mesangial':'MC','Ren1':'GC','other_immune':'Other_Immune','CDPC':'CD-PC','CDIC':'CD-IC','PCT1':'PCT_1','PCT2':'PCT_2'})
if section =='medulla':
    dat = dat[~dat['celltype'].isin(['PCT_1','PCT_2','Podocyte','GC','MD','MC'])]
dat = dat.reset_index()
dat = dat.drop(columns={'index'})

In [None]:
result = mann_whitney(dat,'UMOD-KI','UMOD-WT')
out_path = 'mouse_{section}_{celltype}_mannu_pvals.csv'.format(section=section,celltype=cell_type)
result.to_csv(out_path)