In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import argparse
from os import path
from sklearn.neighbors import radius_neighbors_graph
np.random.seed(111)

In [None]:
cell_type = 'TAL'
specimen = 'mouse'
section = 'cortex'
gene = 'Trem2'

In [None]:
if specimen == 'human':
    arrayids = ['200115_17','200115_18','200121_01','200121_03']
elif specimen == 'mouse':
    # to_process_all_UMODKI.txt and to_process_all_UMODWT.txt contain list of unique ids for UMOD-KI and UMOD-WT arrays
    umodki_arrayids = list(pd.read_csv('to_process_all_UMODKI.txt',header=None)[0])
    umodwt_arrayids = list(pd.read_csv('to_process_all_UMODWT.txt',header=None)[0])
    arrayids = umodki_arrayids+umodwt_arrayids

In [None]:
radius = 25
if specimen == 'mouse':
    cell_names = ['PCT1','PCT2','Endothelial','Mesangial','Fibroblast','TAL','DCT','CDIC','CDPC','Ren1','MD','Podocyte','Macrophage','other_immune','vSMC']
elif specimen == 'human':
    cell_names = ['PCT','Endothelial','Mesangial','Fibroblast','TAL','DCT','CDIC','CDPC','Ren1','MD','Podocyte','Macrophage','Immune','vSMC']

In [None]:
if specimen == 'mouse':
    gene_name = 'Trem2'
    cell_names.append('Trem2')
elif specimen == 'human':
    gene_name = 'Lyve1'
    cell_names.append('Lyve1')

In [None]:
all_interactions = pd.DataFrame()
for array_id in arrayids:
    print(array_id)
    if specimen == 'human':
        pheno = 'human'
    elif specimen == 'mouse':
        if array_id in umodki_arrayids:
            pheno = 'UMOD-KI'
        if array_id in umodwt_arrayids:
            pheno = 'UMOD-WT'
    
    print(array_id,pheno)
    
    # input_path is path to file with beads x features for all curated cell type calls in an array
    # features = {'barcode','x','y','cell_type','section'}
    input_path = '{}_allcells.csv'.format(array_id)
    allcells_info = pd.read_csv(input_path,index_col=0)
    allcells_info = allcells_info[allcells_info['section']==section].copy()
    
    if not allcells_info.empty:
        allcells_info = allcells_info.reset_index()
        allcells_info = allcells_info.drop(columns={'index'})
        
        # input_path is path to file with beads x features for all Lyve1 or Trem2-expressing macrophages in an array
        # features = {'barcode','x','y','section'}
        input_path = '{array_id}_macrophage_{gene}_info.csv'.format(array_id=array_id,gene=gene_name)
        immune_info = pd.DataFrame()
        if path.exists(input_path):
            immune_info = pd.read_csv(input_path,index_col=0)
            immune_info = immune_info.reset_index()
            immune_info = immune_info.drop(columns={'index'})
            immune_barcodes = list(immune_info['barcode'])
        
        if not immune_info.empty:
            trem2_indices = np.where(allcells_info['barcode'].isin(immune_barcodes))[0]
            allcells_info.loc[trem2_indices,'cell_type'] = gene_name
            
            celltype_info = allcells_info[allcells_info['cell_type']==cell_type].copy()
            celltype_barcodes = list(celltype_info['barcode'])
            
            celltype_indices = np.where(allcells_info['barcode'].isin(celltype_barcodes))[0]
            
            celltype_counts = {}
            for name in cell_names:
                celltype_info = allcells_info[allcells_info['cell_type']==name]
                ct = celltype_info.shape[0]
                celltype_counts[name] = ct
            
            # compute nearest neighbor graph of all beads
            allcells_coords = np.array(allcells_info[['x','y']])
            nn = radius_neighbors_graph(allcells_coords, radius, mode='connectivity',include_self=False)
            nn = nn.toarray()
            
            # find nearest neighbor array of cell types of interest
            celltype_nn = nn[celltype_indices,]
            
            # find nearest neighbors of cell type of interest (indices of columns of celltype_nn where value = 1)
            celltype_nn_info_dict = {}
            for row in range(celltype_nn.shape[0]):
                celltype_nn_row = celltype_nn[row,]
                celltype_nn_is_true = np.where(celltype_nn_row == 1)[0]
                celltype_nn_info = allcells_info.iloc[celltype_nn_is_true,]
                celltype_nn_info_dict[row] = celltype_nn_info
            
            interactions = pd.DataFrame()
            for i in range(len(celltype_nn_info_dict)):
                instance = celltype_nn_info_dict[i]
                for name in cell_names:
                    instance_cell = instance[instance['cell_type']==name]
                    num_cell = instance_cell.shape[0]
                    num_cell_norm1 = num_cell/np.sqrt(celltype_counts[name]*celltype_counts[cell_type])
                    d = {'celltype_id':[i],'celltype':[name],'interaction_norm1':[num_cell_norm1]}
                    d = pd.DataFrame(d)
                    interactions = pd.concat([interactions,d])
            interactions = interactions.fillna(0)
            interactions['arrayid'] = [array_id]*interactions.shape[0]
            all_interactions=pd.concat([all_interactions,interactions])

In [None]:
if specimen == 'human':
    human_puck_to_id = {
        '200115_17':'Injured',
        '200115_18':'Injured',
        '200121_01':'Healthy',
        '200121_03':'Healthy'
    }
    
    all_interactions['pheno'] = [human_puck_to_id[x] for x in all_interactions['puckid']]

In [None]:
if specimen == 'mouse':
    phenos_all = []
    for puckid in puckids:
        if puckid in umodki_puckids:
            pheno = 'UMOD-KI'
        elif puckid in umodwt_puckids:
            pheno = 'UMOD-WT'
        temp = all_interactions[all_interactions['puckid']==puckid].copy()
        phenos = [pheno]*temp.shape[0]
        for p in phenos:
            phenos_all.append(p)
    all_interactions['pheno'] = phenos_all
    
    

In [None]:
all_interactions.to_csv('{specimen}_{section}_{celltype}_interactions.csv'.format(specimen=specimen,section=section,celltype=cell_type))