In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import alphashape
from descartes import PolygonPatch
from shapely.geometry import Point
from os import path

In [None]:
# Takes in data matrix containing info for cell type to-be-curated and alphashape of structure in which 
# the cell type must be removed
# Returns filtered cell type data
def celltype_excluder(dat,alpha_shape):    
    points = np.array(dat[['x','y']])
    is_in_struct_arr = []
    for coord in points:
        is_in_struct = alpha_shape.intersects(Point(coord))
        is_in_struct_arr.append(is_in_struct)

    not_in_struct = [not i for i in is_in_struct_arr]
    result = dat[not_in_struct]
    
    return(result)

# Takes in data matrix containing info for cell type to-be-curated and alphashape of structure in which
# the cell must be maintained
# Returns filtered cell type data
def celltype_includer(dat,alpha_shape):
    points = np.array(dat[['x','y']])
    is_in_struct_arr = []
    for coord in points:
        is_in_struct = alpha_shape.intersects(Point(coord))
        is_in_struct_arr.append(is_in_struct)
    
    result = dat[is_in_struct_arr]
    
    return(result)

In [None]:
# Takes in unique array id and outputs df with coordinates corresponding to all spatial inlier beads
def get_inlier_coords(array_id):
    # input_path is path to file with beads x features for all beads (spatial outliers removed)
    # featuers = {'barcode','x','y'}
    input_path = '{array_id}_coords.csv'.format(array_id=array_id)
    all_coords = pd.read_csv(input_path,index_col=0)
    return(all_coords)

# Takes in unique array id and outputs df with raw cell type labels corresponding to all beads
def get_allcells_unfiltered(array_id):
    # input_path is path to file with beads x features for all beads (not yet curated)
    # features = {'barcode','x','y','cell_type'}
    input_path = '{array_id}_allcells_unfiltered_df.csv'.format(array_id=array_id)
    allcells_unfiltered_df = pd.read_csv(input_path,index_col=0)
    return(allcells_unfiltered_df)

# Takes in unique array id and outputs df with raw cell type labels corresponding to all curated beads
def get_allcells_filtered(array_id):
    # input_path is path to file with beads x features for all curated beads
    # features = {'barcode','x','y','cell_type'}
    input_path = '{array_id}_allcells.csv'.format(array_id=array_id)
    allcells_df = pd.read_csv(input_path,index_col=0)
    return(allcells_df)

In [None]:
# maps array id to genotype
# (WT refers to BTBR wt/wt, DKD refers to BTBR ob/ob)
d = {
    '191204_15': 'WT',
    '191204_22': 'DKD',
    '191223_19': 'UMOD-WT',
    '200104_07': 'UMOD-KI',
    '200113_11': '21_cortex',
    '200121_01': '21_medulla',
    '200104_19': '12_cortex',
    '200104_21': '12_medulla',
    '200115_15': '19_cortex',
    '200115_18': '19_medulla'
}

## Blank arrays

In [None]:
for array_id in list(d.keys()):
    print(array_id)
    geno = d[array_id]
    
    all_coords = get_inlier_coords(array_id)
    
    out_path = '{geno}_{array_id}_blank.pdf'.format(array_id=array_id,geno=geno)
    plt.figure(figsize=(5,5))
    plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
    plt.xlim(500,5800)
    plt.ylim(500,5800)
    plt.axis('off')
    plt.savefig(out_path,dpi=300,bbox_inches='tight')
    plt.show()
    plt.close('all')
        

## Individually-curated cell types

In [None]:
for array_id in list(d.keys()):
    print(array_id)
    
    # input_path is path to file with beads x features for all beads assigned a curated cell type
    # features = {'barcode','x','y','cell_type'}
    input_path = '{array_id}_allcells_df.csv'.format(array_id=array_id)
    dat = pd.read_csv(input_path,index_col=0)
    cell_types = np.unique(dat['cell_type'])
    
    all_coords = get_inlier_coords(array_id)
    
    for cell_type in cell_types:
        out_path = '{geno}_{array_id}_{cell_type}_curated_only.pdf'.format(geno=geno,array_id=array_id,cell_type=cell_type)
        celltype_dat = dat[dat['cell_type']==cell_type].copy()
        plt.figure(figsize=(5,5))
        plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
        plt.scatter(celltype_dat['x'],celltype_dat['y'],s=10,c='r',ec='black',linewidth=0.3)
        plt.xlim(500,5800)
        plt.ylim(500,5800)
        plt.axis('off')
        plt.savefig(out_path,dpi=300,bbox_inches='tight')
        plt.show()
        plt.close('all')
        

## Curated vs. raw cell type plotting

In [None]:
# generate polygons encapsulating spatial inlier coordinates for every array
# used later to ensure that unfiltered cell type calls are shown only within array area
alphashape_d = {}
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    all_coords = get_inlier_coords(array_id)
    
    points = np.array(all_coords[['x','y']])
    alpha = 0.01
    alpha_shape = alphashape.alphashape(points,alpha=alpha)
    
    alphashape_d[array_id] = alpha_shape

### CD-IC detection in CD-PC tubules

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    cell_type = 'CD-IC'
        
    all_coords = get_inlier_coords(array_id)
    allcells_unfiltered = get_allcells_unfiltered(array_id)
    allcells_filtered = get_allcells_filtered(array_id)
    
    alpha_shape = alphashape_d[array_id]
    
    alpha = 0.01
    has_glom = False
    has_gc = False
    has_md = False
    
    # input_path is path to file with beads x features for all beads in glomerulus
    # features = {'barcode','x','y','cluster','cell_type'}
    input_path = '{array_id}_glom_with_seurat_cells_info.csv'.format(array_id=array_id)
    if path.exists(input_path):
        has_glom = True
        glom_df = pd.read_csv(input_path,index_col=0)
        points = np.array(glom_df[['x','y']])
        alpha_shape_glom = alphashape.alphashape(points,alpha=alpha)

    gc_df = allcells_filtered[allcells_filtered['cell_type']=='Ren1'].copy()
    if not gc_df.empty:
        has_gc = True
        points = np.array(gc_df[['x','y']])
        alpha_shape_gc = alphashape.alphashape(points,alpha=alpha)

    md_df = allcells_filtered[allcells_filtered['cell_type']=='MD'].copy()
    if not md_df.empty:
        has_md = True
        points = np.array(md_df[['x','y']])
        alpha_shape_md = alphashape.alphashape(points,alpha=alpha)
    
    celltype_unfiltered = allcells_unfiltered[allcells_unfiltered['cell_type']==cell_type].copy()
    celltype_unfiltered = celltype_unfiltered[['barcode']].merge(all_coords[['barcode','x','y']],on='barcode')
    celltype_unfiltered = celltype_includer(celltype_unfiltered,alpha_shape)

    celltype_filtered = allcells_filtered[allcells_filtered['cell_type']==cell_type].copy()
    celltype_filtered = celltype_filtered[['barcode','x','y']].copy()

    cdpc_filtered = allcells_filtered[allcells_filtered['cell_type']=='CD-PC'].copy()
    cdpc_filtered = cdpc_filtered[['barcode','x','y']].copy()

    if has_glom:
        celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_glom)
    if has_gc:
        celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_gc)
    if has_md:
        celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_md)

    out_path = '{geno}_{array_id}_{cell_type}_raw_curated.pdf'.format(geno=geno,array_id=array_id,cell_type=cell_type)
    plt.figure(figsize=(5,5))
    plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
    plt.scatter(celltype_unfiltered['x'],celltype_unfiltered['y'],s=10,c='dimgrey',ec='black',linewidth=0.3)
    plt.scatter(cdpc_filtered['x'],cdpc_filtered['y'],s=10,c='yellowgreen',ec='black',linewidth=0.3)
    plt.scatter(celltype_filtered['x'],celltype_filtered['y'],s=10,c='r',ec='black',linewidth=0.3)
    plt.xlim(500,5800)
    plt.ylim(500,5800)
    plt.axis('off')
    plt.savefig(out_path,dpi=300,bbox_inches='tight')
    plt.show()
    plt.close('all')

### DCT, CD-PC, PCT detection

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    if geno in ['DKD','WT','UMOD-KI','UMOD-WT']:
        specimen = 'mouse'
    else:
        specimen = 'human'
    
    if specimen == 'mouse':
        cell_types = ['DCT','CD-PC','PCT_1','PCT_2']
    elif specimen == 'human':
        cell_types = ['DCT','CD-PC','PCT']
        
    all_coords = get_inlier_coords(array_id)
    allcells_unfiltered = get_allcells_unfiltered(array_id)
    allcells_filtered = get_allcells_filtered(array_id)
    
    alpha_shape = alphashape_d[array_id]
    
    alpha = 0.01
    has_glom = False
    has_gc = False
    has_md = False
    
    # input_path is path to file with beads x features for all beads in glomerulus
    # features = {'barcode','x','y','cluster','cell_type'}
    input_path = '{array_id}_glom_with_seurat_cells_info.csv'.format(array_id=array_id)
    if path.exists(input_path):
        has_glom = True
        glom_df = pd.read_csv(input_path,index_col=0)
        glom_df = glom_df.drop(columns={'cluster'})
        points = np.array(glom_df[['x','y']])
        alpha_shape_glom = alphashape.alphashape(points,alpha=alpha)

    gc_df = allcells_filtered[allcells_filtered['cell_type']=='Ren1'].copy()
    if not gc_df.empty:
        has_gc = True
        points = np.array(gc_df[['x','y']])
        alpha_shape_gc = alphashape.alphashape(points,alpha=alpha)

    md_df = allcells_filtered[allcells_filtered['cell_type']=='MD'].copy()
    if not md_df.empty:
        has_md = True
        points = np.array(md_df[['x','y']])
        alpha_shape_md = alphashape.alphashape(points,alpha=alpha)
    
    for cell_type in cell_types:
        print(puckid,pheno,cell_type)

        celltype_unfiltered = allcells_unfiltered[allcells_unfiltered['max_celltype']==cell_type].copy()
        celltype_unfiltered = celltype_unfiltered[['barcode']].merge(all_coords[['barcode','x','y']],on='barcode')
        celltype_unfiltered = celltype_includer(celltype_unfiltered,alpha_shape)

        celltype_filtered = allcells_filtered[allcells_filtered['cell_type']==cell_type].copy()
        celltype_filtered = celltype_filtered[['barcode','x','y']].copy()
        
        if has_glom:
            celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_glom)
        if has_gc:
            celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_gc)
        if has_md:
            celltype_unfiltered = celltype_excluder(celltype_unfiltered,alpha_shape_md)
        
        out_path = '{geno}_{array_id}_{cell_type}_raw_curated.pdf'.format(geno=geno,array_id=array_id,cell_type=cell_type)
        plt.figure(figsize=(5,5))
        plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
        plt.scatter(celltype_unfiltered['x'],celltype_unfiltered['y'],s=10,c='dimgrey',ec='black',linewidth=0.3)
        plt.scatter(celltype_filtered['x'],celltype_filtered['y'],s=10,c='r',ec='black',linewidth=0.3)
        plt.xlim(500,5800)
        plt.ylim(500,5800)
        plt.axis('off')
        plt.savefig(out_path,dpi=300,bbox_inches='tight')
        plt.show()
        plt.close('all')

### Podocyte detection

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    if geno in ['DKD','WT','UMOD-KI','UMOD-WT']:
        specimen = 'mouse'
    else:
        specimen = 'human'
    
    all_coords = get_inlier_coords(array_id)
    allcells_unfiltered = get_allcells_unfiltered(array_id)
    allcells_filtered = get_allcells_filtered(array_id)
    
    alpha_shape = alphashape_d[array_id]
    
    # input path is path to file containing beads x features for all beads in array
    # (thresholded cell loading matrix output by NMFreg)
    # features = {'barcode','x','y','max_cell_type','cell_type_1',...,'cell_type_n'} features = {'barcode','x','y','max_celltype','cell_type_1',...,'cell_type_n','maxval','thresh_ct'} (loading of each cell type across all beads, max loading per bead, cell type associated with max loading)
    input_path = '{}_nmf_loadings.csv'.format(array_id)
    nmf_calls = pd.read_csv(input_path,index_col=0)
    if specimen == 'mouse':
        celltype_unfiltered = nmf_calls[nmf_calls['max_cell_type']==10].copy()
    elif specimen == 'human':
        celltype_unfiltered = nmf_calls[nmf_calls['max_cell_type']==17].copy()
    celltype_unfiltered = celltype_unfiltered[['barcode']].merge(all_coords[['barcode','x','y']],on='barcode')
    celltype_unfiltered = celltype_includer(celltype_unfiltered,alpha_shape)

    celltype_filtered = allcells_df[allcells_df['cell_type']=='Podocyte'].copy()
    celltype_filtered = celltype_filtered[['barcode','x','y']].copy()

    out_path = '{geno}_{array_id}_Podocyte_raw_curated.pdf'.format(geno=geno,array_id=array_id)
    plt.figure(figsize=(5,5))
    plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
    plt.scatter(celltype_unfiltered['x'],celltype_unfiltered['y'],s=10,c='dimgrey',ec='black',linewidth=0.3)
    plt.scatter(celltype_filtered['x'],celltype_filtered['y'],s=10,c='r',ec='black',linewidth=0.3)
    plt.xlim(500,5800)
    plt.ylim(500,5800)
    plt.axis('off')
    plt.savefig(out_path,dpi=300,bbox_inches='tight')
    plt.show()
    plt.close('all')

### Assigning non-podo cell types to gloms

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    if geno in ['DKD','WT','UMOD-KI','UMOD-WT']:
        specimen = 'mouse'
    else:
        specimen = 'human'
    
    all_coords = get_inlier_coords(array_id)
    
    # input_path is path to file with beads x features for all beads in glomerulus
    # features = {'barcode','x','y','cluster','cell_type'}
    input_path = '{array_id}_glom_with_seurat_cells_info.csv'.format(array_id=array_id)
    if path.exists(input_path):
        glom_info = pd.read_csv(input_path,index_col=0)
        if specimen == 'mouse':
            d2 = {10:'Podocyte',12:'MC',1:'EC'}
            new = [d2[x] for x in glom_info['max_cell_type']]
            glom_info['max_cell_type'] = new
        elif specimen == 'human':
            d2 = {17:'Podocyte',16:'MC',7:'EC'}
            new = [d2[x] for x in glom_info['max_cell_type']]
            glom_info['max_cell_type'] = new
        ec_info = glom_info[glom_info['max_cell_type']=='EC'].copy()
        podo_info = glom_info[glom_info['max_cell_type']=='Podocyte'].copy()
        mc_info = glom_info[glom_info['max_cell_type']=='MC'].copy()

        out_path = '{geno}_{array_id}_assign_celltype_to_glom.pdf'.format(geno=geno,array_id=array_id)
        plt.figure(figsize=(5,5))
        plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
        plt.scatter(ec_info['x'],ec_info['y'],s=20,c='dodgerblue',ec='black',linewidth=0.6)
        plt.scatter(podo_info['x'],podo_info['y'],s=20,c='midnightblue',ec='black',linewidth=0.6)
        plt.scatter(mc_info['x'],mc_info['y'],s=20,c='cyan',ec='black',linewidth=0.6)
        plt.xlim(500,5800)
        plt.ylim(500,5800)
        plt.axis('off')
        plt.savefig(out_path,dpi=300,bbox_inches='tight')
        plt.show()
        plt.close('all')

### GC detection

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    if geno in ['DKD','WT','UMOD-KI','UMOD-WT']:
        specimen = 'mouse'
    else:
        specimen = 'human'
    
    all_coords = get_inlier_coords(array_id)
    allcells_unfiltered = get_allcells_unfiltered(array_id)
    allcells_filtered = get_allcells_filtered(array_id)
    
    alpha_shape = alphashape_d[array_id]
    
    # input_path is path to gene expression counts matrix 
    input_path = '{array_id}_counts.csv'.format(array_id=array_id)
    counts = pd.read_csv(input_path,index_col=0)
    if specimen == 'mouse':
        celltype_unfiltered = counts[counts['Ren1']>0].copy()
    elif specimen == 'human':
        celltype_unfiltered = counts[counts['REN']>0].copy()
    celltype_unfiltered = celltype_unfiltered[['barcode']].merge(all_coords[['barcode','x','y']],on='barcode')
    celltype_unfiltered = celltype_includer(celltype_unfiltered,alpha_shape)   
    
    glom_df = pd.DataFrame(columns=['barcode','x','y'])
    # input_path is path to file with beads x features for all beads in glomerulus
    # features = {'barcode','x','y','cluster','cell_type'}
    input_path = '{array_id}_glom_with_seurat_cells_info.csv'.format(array_id=array_id)
    if path.exists(input_path):
        glom_df = pd.read_csv(input_path,index_col=0)
        glom_df = glom_df[['barcode','x','y']].copy()

    celltype_filtered = allcells_df[allcells_df['cell_type']=='Ren1'].copy()
    celltype_filtered = celltype_filtered[['barcode','x','y']].copy()

    out_path = '{geno}_{array_id}_GC_raw_curated.pdf'.format(geno=geno,array_id=array_id)
    plt.figure(figsize=(5,5))
    plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
    plt.scatter(celltype_unfiltered['x'],celltype_unfiltered['y'],s=10,c='dimgrey',ec='black',linewidth=0.3)
    plt.scatter(celltype_filtered['x'],celltype_filtered['y'],s=10,c='r',ec='black',linewidth=0.3)
    plt.scatter(glom_df['x'],glom_df['y'],s=10,c='dodgerblue',ec='black',linewidth=0.3)
    plt.xlim(500,5800)
    plt.ylim(500,5800)
    plt.axis('off')
    plt.savefig(out_path,dpi=300,bbox_inches='tight')
    plt.show()
    plt.close('all')

### MD detection

In [None]:
for array_id in list(d.keys()):
    geno = d[array_id]
    print(array_id,geno)
    
    all_coords = get_inlier_coords(array_id)
    allcells_unfiltered = get_allcells_unfiltered(array_id)
    allcells_filtered = get_allcells_filtered(array_id)
    
    alpha_shape = alphashape_d[array_id]

    celltype_unfiltered = allcells_unfiltered[allcells_unfiltered['max_celltype']=='TAL'].copy()
    celltype_unfiltered = celltype_unfiltered[['barcode']].merge(all_coords[['barcode','x','y']],on='barcode')
    celltype_unfiltered = celltype_includer(celltype_unfiltered,alpha_shape)
    
    celltype_filtered2 = allcells_df[allcells_df['cell_type']=='MD'].copy()
    celltype_filtered2 = celltype_filtered2[['barcode','x','y']].copy()
    
    glom_df = pd.DataFrame(columns=['barcode','x','y'])
    # input_path is path to file with beads x features for all beads in glomerulus
    # features = {'barcode','x','y','cluster','cell_type'}
    input_path = '{array_id}_glom_with_seurat_cells_info.csv'.format(array_id=array_id)
    if path.exists(input_path):
        glom_df = pd.read_csv(input_path,index_col=0)
        glom_df = glom_df[['barcode','x','y']].copy()
    
    gc_df = allcells_df[allcells_df['cell_type']=='Ren1'].copy()
    gc_df = gc_df[['barcode','x','y']].copy()
    
    out_path = '{geno}_{array_id}_MD_raw_curated.pdf'.format(geno=geno,array_id=array_id)
    plt.figure(figsize=(5,5))
    plt.scatter(all_coords['x'],all_coords['y'],s=3,c='lightgrey')
    plt.scatter(celltype_unfiltered['x'],celltype_unfiltered['y'],s=10,c='dimgrey',ec='black',linewidth=0.3)
    plt.scatter(celltype_filtered2['x'],celltype_filtered2['y'],s=10,c='r',ec='black',linewidth=0.3)
    plt.scatter(gc_df['x'],gc_df['y'],s=10,c='orange',ec='black',linewidth=0.3)
    plt.scatter(glom_df['x'],glom_df['y'],s=10,c='dodgerblue',ec='black',linewidth=0.3)
    plt.xlim(500,5800)
    plt.ylim(500,5800)
    plt.axis('off')
    plt.savefig(out_path,dpi=300,bbox_inches='tight')
    plt.show()
    plt.close('all')

In [None]:
def export_legend(legend, filename="legend.pdf"):
    fig  = legend.figure
    fig.canvas.draw()
    bbox  = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    fig.savefig(filename, dpi=300, bbox_inches=bbox)

In [None]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['bottom'].set_color('none')
plt.gca().spines['left'].set_color('none')
plt.xticks([], [])
plt.yticks([], [])
colors = ['dodgerblue',"cyan",'midnightblue']
texts = ['EC','MC','Podocyte']
patches = [ plt.plot([],[], marker="o", ms=10, mec='black',ls='none', linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
legend2=plt.legend(handles=patches, loc=3, framealpha=1,frameon=False)
export_legend(legend2,filename='legend1.pdf')

In [None]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['bottom'].set_color('none')
plt.gca().spines['left'].set_color('none')
plt.xticks([], [])
plt.yticks([], [])
colors = ['dimgrey','r','dodgerblue','orange']
texts = ['Raw TAL','Curated MD','Glomerulus','GC']
patches = [ plt.plot([],[], marker="o", ms=10, mec='black',ls='none', linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
legend2=plt.legend(handles=patches, loc=3, framealpha=1,frameon=False)
export_legend(legend2,filename='legend2.pdf')

In [None]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 20
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
plt.gca().spines['bottom'].set_color('none')
plt.gca().spines['left'].set_color('none')
plt.xticks([], [])
plt.yticks([], [])
colors = ['dimgrey','r','dodgerblue']
texts = ['Ren1/REN+','Curated GC','Glomerulus']
patches = [ plt.plot([],[], marker="o", ms=10, mec='black',ls='none', linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
legend2=plt.legend(handles=patches, loc=3, framealpha=1,frameon=False)
export_legend(legend2,filename='legend3.pdf')