In [10]:
import matplotlib.pyplot as plt
import alphashape
from descartes import PolygonPatch
from shapely.geometry import Point
from os import path
from KNN_filtration import *

### Function defns

In [37]:
# Takes in data matrix containing info for cell type to-be-curated and alphashape of structure in which 
# the cell type must be removed
# Returns filtered cell type data
def celltype_excluder(dat,alpha_shape):    
    points = np.array(dat[['x','y']])
    is_in_struct_arr = []
    for coord in points:
        is_in_struct = alpha_shape.intersects(Point(coord))
        is_in_struct_arr.append(is_in_struct)

    not_in_struct = [not i for i in is_in_struct_arr]
    result = dat[not_in_struct]
    
    return(result)

# Takes in data matrix containing info for cell type to-be-curated and alphashape of structure in which
# the cell must be maintained
# Returns filtered cell type data
def celltype_includer(dat,alpha_shape):
    points = np.array(dat[['x','y']])
    is_in_struct_arr = []
    for coord in points:
        is_in_struct = alpha_shape.intersects(Point(coord))
        is_in_struct_arr.append(is_in_struct)
    
    result = dat[is_in_struct_arr]
    
    return(result)

### Initialization

In [38]:
section = 'cortex'

In [40]:
# input_path is path to file with all beads x features in an array, 
# features = {'barcode','x','y'}
input_path = 'coords.csv'
coords = pd.read_csv(input_path,index_col=0)
all_coords = np.array(coords)

# input_path is path to file with beads x features for all curated CD-PC data in section of interest
# features = {'barcode','x','y'}
#input_path = 'CDPC_dat.csv'
CDPC_dat = pd.read_csv(input_path,index_col=0)

# input_path is path to file with beads x features for all CD-IC beads
# features = {'barcode','x','y'}
input_path = 'CDIC_dat.csv'
CDIC_dat = pd.read_csv(input_path,index_col=0)

### Remove CD-IC from relevant structures

In [41]:
has_glom = False
has_gc = False
has_MD = False
alpha=0.01

# input_path is path to data matrix with barcodes x features for all curated cell types in glomeruli
# features = {'barcode','x','y'}
input_path = 'glom_dat.csv'
if path.exists(input_path):
    has_glom = True
    glom_dat = pd.read_csv(input_path,index_col=0)
    points = np.array(glom_dat[['x','y']])
    glom_alpha_shape = alphashape.alphashape(points,alpha=alpha)

# input_path is path to data matrix with barcodes x features for all curated granular cell beads
# features = {'barcode','x','y'}
input_path = 'gc_dat.csv'
if path.exists(input_path):
    has_gc = True
    gc_dat = pd.read_csv(input_path,index_col=0)
    points = np.array(gc_dat[['x','y']])
    gc_alpha_shape = alphashape.alphashape(points,alpha=alpha)

# input_path is path to data matrix with barcodes x features for all curated MD beads
# features = {'barcode','x','y'}
input_path = 'MD_dat.csv'
if path.exists(input_path):
    has_MD = True
    md_dat = pd.read_csv(input_path,index_col=0)
    points = np.array(md_dat[['x','y']])
    md_alpha_shape = alphashape.alphashape(points,alpha=alpha)

if has_glom:
    CDIC_dat = celltype_excluder(CDIC_dat,glom_alpha_shape)
    if has_gc:
        CDIC_dat = celltype_excluder(CDIC_dat,gc_alpha_shape)
    if has_MD:
        CDIC_dat = celltype_excluder(CDIC_dat,md_alpha_shape)

### Phase 1 filtration of CD-IC beads

#### Concatenate curated CD-PC beads with unfiltered CD-IC beads and run KNN-filtration

In [42]:
CDIC_dat['cell_type'] = ['CD-IC']*CDIC_dat.shape[0]
CDPC_dat['cell_type'] = ['CD-PC']*CDPC_dat.shape[0]
CD_dat = pd.concat([CDIC_dat,CDPC_dat])

In [43]:
# specify parameters
# k = number of nearest neighbors to consider per marker bead
# threshold = number of nearest neighbors to maintain
k = 150
threshold = 5

In [44]:
CD_eroded = get_markers_in_struct(all_coords, np.array(CD_dat[['barcode','x','y']]), k,threshold)
CD_eroded = pd.DataFrame(CD_eroded)
CD_eroded = CD_eroded.rename(columns={0:'barcode',1:'x',2:'y'})

In [45]:
CD_eroded = CD_eroded[['barcode']].merge(CD_dat,on='barcode')
coords = np.array(CD_eroded[['x','y']])
alpha = 0.012
CD_alpha_shape = alphashape.alphashape(coords, alpha)
CD_eroded = celltype_includer(CD_eroded,CD_alpha_shape)

In [46]:
CDIC_eroded = CD_eroded[CD_eroded['cell_type']=='CD-IC']

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(CDPC_dat['x'],CDPC_dat['y'],s=3,c='b')
plt.scatter(CDIC_eroded['x'],CDIC_eroded['y'],s=3,c='r')
plt.xlim(0,6000)
plt.ylim(0,6000)

colors=['b','r']
texts=['CD-PC','CD-IC filter 1']
patches = [ plt.plot([],[], marker="o", ms=3, ls="", linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
plt.legend(handles=patches, bbox_to_anchor=(1, 0.5), 
        loc='center left', ncol=1,fontsize='medium')

plt.show()

### Phase 2 filtration of CD-IC beads

#### Maintain CD-IC beads near or within CD-PC tubules

In [48]:
# compute polygon encapsulating outer CDPC points
coords = np.array(CDPC_dat[['x','y']])
alpha = 0.015
CDPC_alpha_shape = alphashape.alphashape(coords, alpha)

In [49]:
CDIC_coords=[list(x) for x in np.array(CDIC_eroded[['x','y']])]

d = []
for coord in CDIC_coords:
    dist = CDPC_alpha_shape.distance(Point(coord))
    d.append(dist)

CDIC_eroded=CDIC_eroded.reset_index()
d = np.array(d)
CDIC_eroded=CDIC_eroded.loc[list(np.where(d<100)[0])]

In [None]:
plt.figure(figsize=(10,10))
plt.scatter(CDPC_dat['x'],CDPC_dat['y'],s=3,c='b')
plt.scatter(CDIC_eroded['x'],CDIC_eroded['y'],s=3,c='r')
plt.xlim(0,6000)
plt.ylim(0,6000)
colors=['b','r']
texts=['CD-PC','CD-IC filter 2']
patches = [ plt.plot([],[], marker="o", ms=3, ls="", linewidth=0.6,color=colors[i], 
                label="{:s}".format(texts[i]) )[0]  for i in range(len(texts)) ]
plt.legend(handles=patches, bbox_to_anchor=(1, 0.5), 
        loc='center left', ncol=1,fontsize='medium')
plt.show()

In [None]:
# out_path is path to output file
out_path = 'CDIC_filtered.csv'
CDIC_dat_filt.to_csv(out_path)