In [1]:
import time
import multiprocessing
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

def process_cell_counts(i, cell_positions, cell_labels, dist_bin_px):
    # squared distance
    counts = np.sum(np.square(cell_positions[i][np.newaxis, :] - cell_positions), axis=1)
    # inequalities around arcs
    counts = counts[np.newaxis, :] <= np.square(np.concatenate([[0], dist_bin_px]))[:, np.newaxis]
    # matmul to counts
    counts = np.diff(np.matmul(counts.astype(int), cell_labels.astype(int)), axis=0)
    # return index and counts
    return i, counts

def per_image_cell_counts_euc(image, cell_positions, cell_labels, targ_labels, dist_bin_px):
    '''
    per_image_cell_counts_euc() returns the number of cells within a given image

    Parameters:
        cell_positions (pd.DataFrame): DataFrame containing the cell positions
        cell_labels (np.array): labels of the cells
        targ_labels (np.array): labels of the cells to be counted
        dist_bin_px (np.array): distance bins in pixels
    '''
    
    start_time = time.time()
    print(f'Starting analysis for image {image}')
    # calculate pairwise distances between all cells in the image
    dist_st_time = time.time()
    distances = euclidean_distances(cell_positions)
    dist_end_time = (time.time() - dist_st_time) / 60
    print(f'Finished distance calculation for image {image} ({len(cell_positions)} cells) in {dist_end_time:.2f} minutes')

    image_counts = None
    for i in range(len(distances)):
        counts = euclidian_counts(i, distances, cell_labels, targ_labels, dist_bin_px)
        if image_counts is not None:
            image_counts = np.vstack((image_counts, counts))
        else:
            image_counts = counts

    comp_time = (time.time() - start_time) / 60
    print(f'Finished analysis for image {image} in {comp_time:.2f} minutes')
    return image_counts

def euclidian_counts(idx, distances, cell_labels, targ_labels, dist_bin_px):
    '''
    euclidian_counts() returns the number of cells within a given 
    distance of a given cell.

    Parameters:
        idx (int): index of the cell to be counted
        distances (np.array): pairwise distances between cells
        cell_labels (np.array): labels of the cells
        targ_labels (np.array): labels of the cells to be counted
        dist_bin_px (np.array): distance bins in pixels
    '''

    idx_counts = None
    dist_bin_px = np.concatenate([[0], dist_bin_px])
    for i in range(len(dist_bin_px)-1):
        present_cells = cell_labels[(distances[idx] > dist_bin_px[i]) & (distances[idx] <= dist_bin_px[i+1])]
        these_counts = [sum(present_cells == label) for label in targ_labels]

        if idx_counts is not None:
            idx_counts = np.vstack((idx_counts, these_counts))
        else:
            idx_counts = np.array(these_counts)

    return idx_counts[np.newaxis, :]

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

def draw_cell_scatter(df, ind = None):
    sns.scatterplot(data = df,
                    x = 'Cell X Position',
                    y = 'Cell Y Position',
                    hue = 'Lineage')
    
    if ind is not None:
        plt.Circle(df.loc[ind, ['Cell X Position', 'Cell Y Position']])

In [3]:
import pandas as pd
from functools import partial

dist_bin_um=np.array([25, 50, 100, 150, 200])
dist_bin_px = dist_bin_um/ 0.5
num_cpus_to_use = int(multiprocessing.cpu_count() / 2)

filename = './sample.csv'
df = pd.read_csv(filename)
images = df['Slide ID'].unique()

In [7]:
# Initialize keyword arguments
kwargs_list = []

for image in images:

    df_image = df.loc[df['Slide ID'] == image, :]
    cell_positions = df_image[['Cell X Position', 'Cell Y Position']]
    cell_labels = df_image['Lineage']
    targ_labels = df['Lineage'].unique()
    dist_bin_px = dist_bin_px

    results = per_image_cell_counts_euc(image, cell_positions, cell_labels, targ_labels, dist_bin_px)
    print(results.shape)
    kwargs_list.append(
        (
            image,
            cell_positions,
            cell_labels,
            targ_labels,
            dist_bin_px
        )
    )

# # Create a pool of worker processes
# with multiprocessing.Pool(processes=num_cpus_to_use) as pool:
#     results = pool.starmap(per_image_cell_counts_euc, kwargs_list)

Starting analysis for image 1A-p10197
Finished distance calculation for image 1A-p10197 (201 cells) in 0.00 minutes
Finished analysis for image 1A-p10197 in 0.00 minutes
(201, 5, 3)


In [None]:
cell_positions = df[['Cell X Position', 'Cell Y Position']].values
cell_labels = pd.get_dummies(df['Lineage'])

args = dict(cell_positions=cell_positions,
            cell_labels=cell_labels.values,
            dist_bin_px=dist_bin_px)
pool_map_fn = partial(process_cell_counts, **args)

idxSet = range(100)
i, counts = list(map(lambda x: np.stack(x, axis=0), list(zip(*map(pool_map_fn, idxSet)))))