In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from shapely.geometry import Point
import matplotlib.pyplot as plt
import os

### Function defns

In [None]:
# compute_cluster_radii helper function
# computes distance between two coordinates
def single_dist(coord1,coord2):
    dist = Point(coord1).distance(Point(coord2))
    return(dist)

# structure_assigner helper function
# input: kmeans cluster centers and tuples eroded points and which cluster they each belong to
# returns radius of each cluster
def compute_cluster_radii(cluster_centers, eroded):
    radii = []
    clusters = np.unique(eroded['cluster'])
    
    for i,cluster in enumerate(clusters):
        center = cluster_centers[i]
        cluster = eroded[eroded['cluster']==cluster].copy()
        cluster_coords = np.array(cluster[['x','y']])
        d_to_center = [single_dist(c,center) for c in cluster_coords]
        radii.append(max(d_to_center))   
    
    return(radii)

def is_in_circle(coord,center,r):
    return(single_dist(coord,center) < r)

# input: centers of curated podocyte clusters, curated podocyte beads, other celltypes to be added
# assigns all other coordinates assigned cell types other than marker to structure components
# returns data frame with all coords within structure, their cluster assignments, and cell type assignments
def assign_other_celltypes(cluster_centers, eroded, other, cell_type):
    radii = compute_cluster_radii(cluster_centers,eroded)
    other = other[other['cell_type']==cell_type].copy()
    other_coords = np.array(other[['x','y']])
    other_barcodes = [x[0] for x in np.array(other[['barcode']])]
    n_clusters = len(cluster_centers)
    result = []
    for i in range(len(other_coords)):
        for j in range(n_clusters):
            if(is_in_circle(other_coords[i],cluster_centers[j],radii[j])):
                to_append = [other_barcodes[i],j]
                result.append(to_append)
                break
    return(result)

### File input

In [None]:
# input_path is path to data matrix with beads x features for all curated podocytes
# features = {'barcode','x','y'} 
input_path = 'podo_dat.csv'
podo_dat = pd.read_csv(input_path,index_col=0)

In [None]:
# input_path is path to data matrix with beads x features for all other cell types to be added to glomeruli (mesangial and endothelial cells)
# features = {'barcode','x','y','cell_type'}
input_path = 'other_cells_dat.csv'
other_cells_dat = pd.read_csv(input_path,index_col=0)
other_cells_dat = other_cells_dat.reset_index()
other_cells_dat = other_cells_dat.drop(columns={'index'})

### Analysis

In [None]:
### visualize podocytes to determine number of glomeruli (n_clusters param)
plt.figure(figsize=(10,10))
plt.scatter(podo_dat['x'],podo_dat['y'],s=3,c='b')
plt.xlim(0,6000)
plt.ylim(0,6000)
plt.show()

In [None]:
### find cluster centers of curated podocyte markers
# set n_clusters to number of podocyte clusters in array
n_clusters=0
coords = np.array(podo_dat[['x','y']])
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(coords)
podo_cluster_centers = kmeans.cluster_centers_
labels = kmeans.labels_
podo_dat['cluster'] = labels

In [None]:
### find mesangial cells in glomerular areas
mesangial_in_glom = assign_other_celltypes(cluster_centers,podo_dat,other_cells_dat,'Mesangial')
mesangial_in_glom = pd.DataFrame(mesangial_in_glom)
mesangial_in_glom = mesangial_in_glom.rename(columns={0:'barcode',1:'cluster'})
mesangial_in_glom = other_cells_dat.merge(mesangial_in_glom,on='barcode')

### find endothelial cells in glomerular areas
endothelial_in_glom = assign_other_celltypes(cluster_centers,podo_dat,other_cells_dat,'Endothelial')
endothelial_in_glom = pd.DataFrame(endothelial_in_glom)
endothelial_in_glom = endothelial_in_glom.rename(columns={0:'barcode',1:'cluster'})
endothelial_in_glom = other_cells_dat.merge(endothelial_in_glom,on='barcode')

podo_dat['cell_type']=['Podocyte']*podo_dat.shape[0]
podo_dat = podo_dat[['barcode','x','y','cell_type','cluster']]

final = pd.concat([podo_dat,mesangial_in_glom,endothelial_in_glom])
final = final.reset_index()
final = final.drop(columns={'index'})

In [None]:
# out_path is path to output file
out_path = 'all_glomerular_cells.csv'
final.to_csv(out_path)