# Tutorial about Ripley's k function

In [None]:
from pathlib import Path

%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

import locan as lc

In [None]:
lc.show_versions(system=False, dependencies=False, verbose=False)

## Simulate data

We simulate localization data that is homogeneously Poisson distributed.

In [None]:
dat_random = lc.simulate_csr(n_samples=1000, feature_range=(0,1000), seed=1)

print('Data head:')
print(dat_random.data.head(), '\n')
print('Summary:')
dat_random.print_summary()
print('Properties:')
print(dat_random.properties)

We also simulate data that follows a Neyman-Scott distribution (blobs): 

In [None]:
dat_blob = lc.simulate_Thomas(n_samples=1000, n_features=2, centers=10, feature_range=(0, 1000), cluster_std=10, seed=1)

print('Data head:')
print(dat_blob.data.head(), '\n')
print('Summary:')
dat_blob.print_summary()
print('Properties:')
print(dat_blob.properties)

### Scatter plot

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2)
dat_random.data.plot.scatter(x='position_x', y='position_y', ax=ax[0], color='Blue', label='locdata')
dat_blob.data.plot.scatter(x='position_x', y='position_y', ax=ax[1], color='Blue', label='locdata')
plt.tight_layout()
plt.show()

## Analyze Ripley's h function

We have a look at the Ripley's h function from all localizations in locdata. 

The analysis class Ripley_h_function provides numerical results, and a plot of results versus radii.

In [None]:
rhf_random = lc.RipleysHFunction(radii=np.linspace(0, 200, 100))
rhf_random.compute(dat_random)
rhf_random.results.head()

In [None]:
rhf_blob = lc.RipleysHFunction(radii=np.linspace(0, 200, 100))
rhf_blob.compute(dat_blob)
rhf_blob.results.head()

The plot reflects the amount of clustering. For homogeneous distributed data it decreases towards negative values since edge effects are not taken into account.

In [None]:
rhf_random.plot()
rhf_blob.plot();

## Estimate Ripley's h function

We can speed up the computation of an estimated Ripley's k function by providing a subset of the original localizations as test points.

We first take a random subset of the original localizations as test data. Here we provide 10 shuffeled data sets.  

In [None]:
from locan.data.filter import random_subset
subsets = [lc.random_subset(dat_blob, n_points=5) for i in range(10)]

We then compute the estimated Ripley's h function'

In [None]:
rhf_estimate = lc.RipleysHFunction(radii=np.linspace(0, 200, 100)).compute(dat_blob, other_locdata=subsets[0])

In [None]:
rhf_estimate.plot();

We can do the same for all subsets

In [None]:
rhf_estimates = [lc.RipleysHFunction(radii=np.linspace(0, 200, 100)).compute(dat_blob, other_locdata=subset) for subset in subsets]

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
for estimate in rhf_estimates:
    estimate.plot(ax=ax)
plt.show()

## Compute Ripley's k, l and h function

We can compute Ripley's k, l and h function

In [None]:
rkf_random = lc.RipleysKFunction(radii=np.linspace(0, 200, 100)).compute(dat_random)
rlf_random = lc.RipleysLFunction(radii=np.linspace(0, 200, 100)).compute(dat_random)
rhf_random = lc.RipleysHFunction(radii=np.linspace(0, 200, 100)).compute(dat_random)

In [None]:
rkf_random.plot()
rlf_random.plot()
rhf_random.plot();

## Estimate Ripley's h function for 3D data

In [None]:
dat_blob_3D = lc.simulate_Thomas(n_samples=10_000, n_features=3, centers=200, feature_range=(0, 10_000), cluster_std=50, seed=1)

In [None]:
sub = lc.random_subset(dat_blob_3D, n_points=1000)

In [None]:
rhf_3D = lc.RipleysHFunction(radii=np.linspace(0, 1000, 100)).compute(dat_blob_3D, other_locdata=sub)

In [None]:
rhf_3D.plot();

## Find the maximum of Ripley's h function

In [None]:
rhf_blob = lc.RipleysHFunction(radii=np.linspace(0, 200, 100)).compute(dat_blob)

In [None]:
rhf_blob.plot();

In [None]:
rhf_blob.Ripley_h_maximum