# Ripley analysis

11.8.2021 (8.9.2022) Sören Doose

Ripley analysis for a list of rois from different conditions.

## Initial setup

In [None]:
import sys
from pathlib import Path
import re
import pickle
import warnings
import logging

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from scipy import stats
from tqdm.notebook import tqdm

import locan as lc

In [None]:
lc.show_versions(dependencies=False)

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

## File list

In [None]:
directory = Path(r'.') / '../data/Titration'
assert directory.exists()

In [None]:
files = list(directory.glob('**/*.yaml'))
print(f'Number of files: {len(files)}')
for i, file in enumerate(files):
    print(i,":", file);

## Set up the pipeline

In [None]:
def computation(self, file, seed=None):
    """
    Analysis procedure on a LocData object specified by file.
    
    Parameters
    ----------
    self : Pipeline
        Pipeline object that collects results of the analysis procedure.
    file : str
        File path for roi-file.
        
    Returns
    -------
    Pipeline
        The Pipeline object specified by `self`.
    """
    # Prologue
    self.file_indicator = Path(file) #.stem
    rng = np.random.default_rng(seed=seed)
        
    # Load locdata
    roi = lc.Roi.from_yaml(path=file)
    roi.reference.file_path = str(Path(file)
                              .relative_to(Path('.'))
                              .with_name(Path(roi.reference.file_path).name)
                             )
    locdata = roi.locdata()
    
    # Select
    condition = '0 < frame < 15_000 and 8000 < intensity' ##### PARAMETER #####
    locdata = lc.select_by_condition(locdata, condition=condition)
    locdata.reduce()
    
    # Prerequisites
    if not len(locdata) > 100:
        return None
        
    # Ripley
    radii = np.linspace(1, 500, 100)  ##### PARAMETER #####
    n_points = 200  ##### PARAMETER #####
   
    region = locdata.region

    # experimental
    subset = lc.random_subset(locdata, n_points=n_points, seed=rng)
    self.rhf_estimates = lc.RipleysHFunction(radii=radii, region_measure=region.region_measure).compute(locdata, other_locdata=subset) 
    
    # randomized
    repetitions = 100  ##### PARAMETER #####
        
    self.rhf_randomized = []
    for i in range(repetitions):
        dat_randomized = lc.randomize(locdata, hull_region=region, seed=rng)
        subset_random = lc.random_subset(dat_randomized, n_points=n_points, seed=rng)
        self.rhf_randomized.append(lc.RipleysHFunction(radii=radii, region_measure=region.region_measure).compute(dat_randomized, other_locdata=subset_random))
    
    # Ripley control
    n_localizations_per_dye = 11  ##### PARAMETER #####
    min_localizations_per_dye = 1
    n_dyes = round(len(locdata) / n_localizations_per_dye)
    localization_precision = 12  ##### PARAMETER #####
    
    self.rhf_control = []
    for i in range(repetitions):        
        locdata_control = lc.simulate_dstorm(parent_intensity=n_dyes/region.region_measure, region=region, 
                                             cluster_mu=n_localizations_per_dye, min_points=min_localizations_per_dye, 
                                             cluster_std=localization_precision, seed=rng)
        subset_control = lc.random_subset(locdata_control, n_points=n_points, seed=rng)
        self.rhf_control.append(lc.RipleysHFunction(radii=radii, region_measure=region.region_measure).compute(locdata_control, other_locdata=subset_control))

    return self

## Run pipeline

In [None]:
logger.setLevel(logging.INFO)  # alternative logging.WARNING

### Multiprocessing with ray

In [None]:
import ray

ray.init()
# ray.init(num_cpus = 4)

In [None]:
%%time
@ray.remote
def worker(i, file, seed):
    # Logging configuration needed for multiprocessing with ray
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger.info(f'Processing {i} : {file}')
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=np.VisibleDeprecationWarning)
            pipe = lc.Pipeline(computation=computation, file=file, seed=seed).compute()
        logger.info(f'Computation completed for: {file}') 
        return pipe    
    except Exception as e:
        logger.warning(f'Error in {file} : {e}')
        return None
    
n_processes = len(files)
ss = np.random.SeedSequence()
child_seeds = ss.spawn(n_processes)

futures = [worker.remote(i, file, seed) for i, (file, seed) in enumerate(zip(files, child_seeds))]
pipes = ray.get(futures)
print(f'Number of pipes: {len(pipes)}')

## Pipeline attributes

In [None]:
[attr for attr in dir(pipes[0]) if not attr.startswith('__') and not attr.endswith('__')]

## Save pickled pipes

## Load pickled pipes

## Remove None from pipes

In [None]:
print(f'Number of pipes: {len(pipes)}')
pipes = [pipe for pipe in pipes if pipe]
print(f'Number of pipes that are not None: {len(pipes)}')

## Extract from pipes

In [None]:
def collect_scalars(pipelines):
    """
    Collect scalar properties from Pipeline objects and assemble them in a pandas.DataFrame.
    
    Parameters
    ----------
    pipelines : list(Pipeline)
        Pipeline objects.
        
    Returns
    -------
    pandas.DataFrame
    """
    dictionaries = []
    for pipe in pipelines:

        new_dict = {
            'files': pipe.file_indicator
            }

        dictionaries.append(new_dict)
        
    return pd.DataFrame(dictionaries)

In [None]:
scalars_df = collect_scalars(pipes)

## Define groups

Reduce the file name to group identifier:

In [None]:
choices = ['*5ng*',
           '*0-005*']

choices_name =  ['dense',
                 'sparse']
   
conditions = [[f.match(name) for f in scalars_df['files']] for name in choices]
scalars_df['sample'] = np.select(conditions, choices_name, default=None)

In [None]:
grouped = scalars_df.groupby('sample')

In [None]:
list(grouped.groups)

## Grouped Ripley curves

In [None]:
def RipleysAnalysis_concat(cls, datasets):
    if isinstance(datasets[0], cls):
        dataframes = [analysis_class.results for analysis_class in datasets]
    elif isinstance(datasets[0], (pd.DataFrame, pd.Series)):
        dataframes = datasets
    else:
        raise TypeError
    df = pd.concat(dataframes, axis=1, join="inner")
    new_analysis_class = cls()
    new_analysis_class.results = df
    return new_analysis_class

In [None]:
def RipleyAnalysis_statistics(self):
    dataframe = self.results
    self.statistics = pd.concat([
        dataframe.mean(axis=1),
        dataframe.std(axis=1),
        dataframe.sem(axis=1),
        dataframe.quantile(0.05, axis=1),
        dataframe.quantile(0.95, axis=1)],
        axis=1)
    self.statistics.rename(columns={0:'mean', 1:'std', 2:'sem', 0.05:'CI_lower', 0.95:'CI_upper'}, inplace=True)
    return self

In [None]:
grouped_rhf_estimates = {}
grouped_rhf_randomized = {}
grouped_rhf_control = {}

for name, group in grouped:
    # experimental
    rhf_estimates = [pipes[i].rhf_estimates for i in group.index]
    rhf_estimates = RipleysAnalysis_concat(cls=lc.RipleysHFunction, datasets=rhf_estimates)
    rhf_estimates = RipleyAnalysis_statistics(rhf_estimates)
    grouped_rhf_estimates[name] = rhf_estimates
    
    # randomized
    rhf_randomized = []
    for n in range(len(pipes[0].rhf_randomized)):
        rhf_randomized_ = [pipes[i].rhf_randomized[n] for i in group.index]
        rhf_randomized_ = RipleysAnalysis_concat(cls=lc.RipleysHFunction, datasets=rhf_randomized_)
        rhf_randomized_ = RipleyAnalysis_statistics(rhf_randomized_)
        rhf_randomized.append(rhf_randomized_.statistics['mean'])
    
    rhf_randomized = RipleysAnalysis_concat(cls=lc.RipleysHFunction, datasets=rhf_randomized)
    rhf_randomized = RipleyAnalysis_statistics(rhf_randomized)
    grouped_rhf_randomized[name] = rhf_randomized
    
    # control
    rhf_control = []
    for n in range(len(pipes[0].rhf_control)):
        rhf_control_ = [pipes[i].rhf_control[n] for i in group.index]
        rhf_control_ = RipleysAnalysis_concat(cls=lc.RipleysHFunction, datasets=rhf_control_)
        rhf_control_ = RipleyAnalysis_statistics(rhf_control_)
        rhf_control.append(rhf_control_.statistics['mean'])
    
    rhf_control = RipleysAnalysis_concat(cls=lc.RipleysHFunction, datasets=rhf_control)
    rhf_control = RipleyAnalysis_statistics(rhf_control)
    grouped_rhf_control[name] = rhf_control

### all data

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,8))
for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_estimates.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group + "-estimates")
    ax.fill_between(grouped_rhf.statistics.index,'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_randomized.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group+"-randomized")
    ax.fill_between(grouped_rhf.statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_control.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group+"-control")
    ax.fill_between(grouped_rhf.statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

ax.set(title='All Data',
       xlabel='distance (nm)',
       ylabel="Ripley's h function") 
plt.legend(loc='upper right')
plt.show()

### experimental data

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,8))

for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_estimates.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group + "-estimates")
    ax.fill_between(grouped_rhf.statistics.index,'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

ax.set(title='Experimental Data',
       xlabel='distance (nm)',
       ylabel="Ripley's h function") 
plt.legend(loc='upper right')
plt.show()

### randomized

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,8))

for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_randomized.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group+"-randomized")
    ax.fill_between(grouped_rhf.statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

ax.set(title='Randomized Data',
       xlabel='distance (nm)',
       ylabel="Ripley's h function") 
plt.legend(loc='upper right')
plt.show()

### control

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,8))

for group, (name, grouped_rhf) in zip(grouped.groups, grouped_rhf_control.items()):
    ax.plot('mean', data=grouped_rhf.statistics, label=group+"-control")
    ax.fill_between(grouped_rhf.statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf.statistics, color='lightgrey')

ax.set(title='Control Data',
       xlabel='distance (nm)',
       ylabel="Ripley's h function") 
plt.legend(loc='upper right')
plt.show()

### Group wise

In [None]:
for group in list(grouped.groups):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,8))
    data_selectors = (grouped_rhf_estimates, grouped_rhf_randomized, grouped_rhf_control)
    label_extensions = ("-estimates", "-randomized", "-control")

    colors = mcolors.TABLEAU_COLORS
    for label_extension, grouped_rhf, color in zip(label_extensions, data_selectors, colors):
        ax.plot('mean', data=grouped_rhf[group].statistics, c=color, linewidth=4, label=group+label_extension)
        ax.plot('CI_lower', data=grouped_rhf[group].statistics, c=color, linestyle="dashed")
        ax.plot('CI_upper', data=grouped_rhf[group].statistics, c=color, linestyle="dashed")
        ax.fill_between(grouped_rhf[group].statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf[group].statistics, color='lightgrey')

    ax.set(title=group,
       xlabel='distance (nm)',
       ylabel="Ripley's h function") 
    plt.legend(loc='upper right')
plt.show()

## Publication figure

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7.2, 5.8))

group = "sparse"

data_selectors = (grouped_rhf_randomized, grouped_rhf_control, grouped_rhf_estimates)
# labels = (label_definitions[group], " ", " ")
labels = (" ", " ", group)
colors = ['darkgray', 'darkgray', '#a6cee3']

for label, grouped_rhf, color in zip(labels, data_selectors, colors):
    if grouped_rhf is not grouped_rhf_estimates:
        ax.plot('CI_lower', data=grouped_rhf[group].statistics, c='grey', linestyle="dashed", label='')
        ax.plot('CI_upper', data=grouped_rhf[group].statistics, c='grey', linestyle="dashed", label='')
        ax.fill_between(grouped_rhf[group].statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf[group].statistics, color='lightgrey')
    ax.plot('mean', data=grouped_rhf[group].statistics, c=color, linewidth=4, label=label)

ax.set_xlabel('Distance (nm)', fontsize=28)
ax.set_ylabel("Ripley's h function", fontsize=28)
plt.xticks(fontsize=22)
plt.yticks(fontsize=22)

plt.text(0.4, 0.9, labels[2], fontsize=24, transform=ax.transAxes)
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7.2, 5.8))

group = "dense"

data_selectors = (grouped_rhf_randomized, grouped_rhf_control, grouped_rhf_estimates)
# labels = (label_definitions[group], " ", " ")
labels = (" ", " ", group)
colors = ['darkgray', 'darkgray', '#1f78b4']

for label, grouped_rhf, color in zip(labels, data_selectors, colors):
    if grouped_rhf is not grouped_rhf_estimates:
        ax.plot('CI_lower', data=grouped_rhf[group].statistics, c='grey', linestyle="dashed", label='')
        ax.plot('CI_upper', data=grouped_rhf[group].statistics, c='grey', linestyle="dashed", label='')
        ax.fill_between(grouped_rhf[group].statistics.index, 'CI_lower', 'CI_upper', data=grouped_rhf[group].statistics, color='lightgrey')
    ax.plot('mean', data=grouped_rhf[group].statistics, c=color, linewidth=4, label=label)

ax.set_xlabel('Distance (nm)', fontsize=28)
ax.set_ylabel("Ripley's h function", fontsize=28)
plt.xticks(fontsize=22)
plt.yticks(fontsize=22)

plt.text(0.4, 0.9, labels[2], fontsize=24, transform=ax.transAxes)
plt.show()