# Tutorial about simulating localization data

Locan provides methods for simulating basic localization data sets as LocData objects.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

%matplotlib inline

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import locan as lc

In [None]:
lc.show_versions(system=False, dependencies=False, verbose=False)

## Use random number generator

In all simulations we make use of numpy routines for random number generation by instantiating `numpy.random.default_rng` and taking a seed parameter. Therefore, we recommend to set up a random number generator in every script and pass that generator instance to all simulation functions through the seed parameter.

In [None]:
rng = np.random.default_rng(seed=1)
locdatas = [lc.simulate_csr(n_samples=100, feature_range=(0, 1000), seed=rng) for i in range(3)]

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
for i, locdata in enumerate(locdatas):
    locdata.data.plot.scatter(x='position_x', y='position_y', color=plt.cm.tab10(i), ax=ax, label='locdata')
plt.show()

Make sure to follow the correct procedure for parallel computation as described in the numpy tutorials (https://numpy.org/doc/stable/reference/random/parallel.html).

## Simulate localization data

### Simulate localization data that follows a homogeneous (Poisson) distribution in 2D

In [None]:
dat = lc.simulate_csr(n_samples=1000, feature_range=(0,1000), seed=1)

dat.print_summary()

#### Show scatter plots

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

### Homogeneous (Poisson) distribution in 3D

In [None]:
dat_3D = lc.simulate_csr(n_samples=1000, n_features=3, feature_range=(0,1000), seed=1)

dat_3D.print_summary()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

x,y,z = dat_3D.coordinates.T
ax.scatter(x, y, z, color='Blue', label='locdata')
plt.show()

### Homogeneous (Poisson) distribution on disc

In [None]:
dat = lc.simulate_csr_on_disc(n_samples=1000, radius=2, seed=1)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

### Homogeneous (Poisson) distribution on regions

In [None]:
region = lc.RoiRegion(region_type='polygon', region_specs=((0, 0), (0, 5), (4, 3), (2, 0.5), (0, 0)))
dat = lc.simulate_csr_on_region(region, n_samples=1000, seed=1)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

### Neyman-Scott (Matern) distribution in 2D

In a Matern process localizations are distributed homogeneously in circles of a given radius.

In [None]:
dat = lc.simulate_Matern(n_samples=1000, n_features=2, centers=10, radius=50, feature_range=(0, 1000), seed=1)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

Often a Neyman-Scott distribution includes that the number of points per cluster various according to a Poisson distribution. This can be included by providing n_samples with an array carrying the numbers per cluster.

In [None]:
nrg = np.random.default_rng(seed=1)
n_samples = nrg.poisson(lam=5, size=10)
dat = lc.simulate_Matern(n_samples=n_samples, n_features=2, centers=10, radius=50, feature_range=(0, 1000), seed=nrg)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

### Neyman-Scott (Thomas) distribution in 2D

In a Thomas process localizations are distributed in clusters where each cluster shows a Gauss distribution with center and specified standard deviation.

In [None]:
dat = lc.simulate_Thomas(n_samples=1000, n_features=2, centers=10, feature_range=(0, 1000), cluster_std=20, seed=1)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()

### Neyman-Scott distribution in 3D

In [None]:
dat_3D = lc.simulate_Thomas(n_samples=1000, n_features=3, centers=10, feature_range=(0, 1000), cluster_std=10, seed=1)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

x,y,z = dat_3D.coordinates.T
ax.scatter(x, y, z, color='Blue', label='locdata')
plt.show()

## Resample data

The resample function provides additional localizations for each given localizations that are Gauss distributed around the original localizations with a standard deviation given by the `uncertainty_x` property. 

In [None]:
nrg = np.random.default_rng(seed=1)
n_samples = 10
dat = lc.simulate_csr(n_samples=n_samples, n_features=2, feature_range=(0, 1000), seed=rng)
dat.dataframe = dat.dataframe.assign(uncertainty_x= 20*rng.random(n_samples))
dat.dataframe = dat.dataframe.assign(uncertainty_y= 20*rng.random(n_samples))

In [None]:
dat_resampled = lc.resample(dat, n_samples=1000, seed=rng)
dat_resampled.data.tail()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
dat_resampled.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Red', label='locdata resampled', alpha=0.01)
dat.data.plot.scatter(x='position_x', y='position_y', ax=ax, color='Blue', label='locdata')
plt.show()