In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from shapely.geometry import Point
import os

### Function defns

In [None]:
# weights gc cluster centers by REN/Ren1 gene expression
def weight_centers(gc_dat):
    gc_ids = np.unique(gc_dat['cluster'])
    gc_weighted_cluster_ctrs = []
    
    for i in gc_ids:
        cluster=gc_dat[gc_dat['cluster']==i]
        weighted_coords = cluster[['x','y']].mul(cluster['Ren1'], axis=0)
        denom = sum(cluster['Ren1'])
        cluster_center = (sum(weighted_coords['x'])/denom, sum(weighted_coords['y'])/denom)
        gc_weighted_cluster_ctrs.append(cluster_center)
    
    gc_weighted_cluster_ctrs = np.array(gc_weighted_cluster_ctrs)
    return(gc_weighted_cluster_ctrs)

# compute euclidean distance between two coordinates
def single_dist(coord1,coord2):
    dist = Point(coord1).distance(Point(coord2))
    return(dist)

# finds min distance between each gc cluster and all glomeruli
def compute_gc_glom_dists(gc_cluster_centers,glom_cluster_centers):
    min_dists = []
    for gc_cluster_center in gc_cluster_centers:
        dists = []
        for glom_cluster_center in glom_cluster_centers:
            d = single_dist(gc_cluster_center,glom_cluster_center)
            dists.append((d))
        min_dists.append(min(dists))
    min_dists = np.array(min_dists)
    return(min_dists)

### File input

In [None]:
### Read in input data
# input_path is path to file with beads x features for all curated beads in glomeruli
# features = {'barcode','x','y'}
input_path = 'glom_dat.csv'
glom_dat = pd.read_csv(input_path,index_col=0)

# input_path is path to file with beads x features for all curated beads in granular cells
# features = {'barcode','x','y','Ren1'} ('Ren1' column has expression of REN/Ren1)
input_path = 'gc_dat.csv'
gc_dat = pd.read_csv(input_path,index_col=0)

### Run KMeans for glom/gc structure centers

In [None]:
### visualize glomerular cell type locations to determine number of glomeruli (n_clusters param)
plt.figure(figsize=(10,10))
plt.scatter(glom_dat['x'],glom_dat['y'],s=3,c='b')
plt.xlim(0,6000)
plt.ylim(0,6000)
plt.title('glomeruli')
plt.show()

In [None]:
# set n_clusters to number of glomeruli in array
n_clusters=0
coords = np.array(glom_dat[['x','y']])
kmeans = KMeans(n_clusters=n_clusters, random_state=0,n_jobs=-1).fit(coords)
glom_cluster_centers = kmeans.cluster_centers_

In [None]:
### visualize granular cell locations to determine number of gc clusters (n_clusters param)
plt.figure(figsize=(10,10))
plt.scatter(gc_dat['x'],gc_dat['y'],s=3,c='r')
plt.xlim(0,6000)
plt.ylim(0,6000)
plt.title('granular cells')
plt.show()

In [None]:
# set n_clusters to number of granular cells in array
n_clusters=0
coords = np.array(gc_dat[['x','y']])
kmeans = KMeans(n_clusters=n_clusters, random_state=0,n_jobs=-1).fit(coords)
gc_dat['cluster'] = kmeans.labels_
gc_cluster_centers = weight_centers(gc_dat)

### Find minimum distance between each gc cluster and glom

In [None]:
# out_path is path to output file
dists = compute_gc_glom_dists(gc_cluster_centers,glom_cluster_centers)
dists = pd.DataFrame(dists)
out_path = 'glom_gc_distances.csv'
dists.to_csv(out_path)