In [None]:
import numpy as np
from scipy import stats

from matplotlib import pyplot as plt
from scipy.stats.mstats import gmean as geometric_mean

from robstat.robstat import circ_mean_dev, geometric_median, mardia_median, \
mv_median, tukey_median

In [None]:
%matplotlib inline

In [None]:
# create sample angular data
angle_center = 0.5
angles = angle_center + np.random.normal(loc=0, scale=0.2, size=200)

x_coords = np.cos(angles)
y_coords = np.sin(angles)

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=100)

circ_rad = 1
lim_rng = circ_rad * 1.25

ax.set(xlim=(-lim_rng, lim_rng), ylim = (-lim_rng, lim_rng))

a_circle = plt.Circle((0, 0), 1, fill=False, color='blue', alpha=0.5)
ax.add_artist(a_circle)

ax.axhline(0, color='grey', ls='--', lw=0.5)
ax.axvline(0, color='grey', ls='--', lw=0.5)
ax.plot(x_coords, y_coords, 'o', color='orange', markersize=4, alpha=0.5)

plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=100, subplot_kw={'projection': 'polar'})

ax.set(rlim=(0, lim_rng))
ax.set_rlabel_position(-90)

ax.plot(np.linspace(0, 2*np.pi, 1000), np.ones(1000), color='blue', linestyle='-', alpha=0.5)

ax.plot(angles, np.ones_like(angles), 'o', color='orange', markersize=4, alpha=0.5)

plt.show()

In [None]:
# circular mean deviation
np.array([circ_mean_dev(angles, angle_center)]).item()

### Mardia Median

Mardia median given by the angle that the circular mean deviation:

$$ d(\tilde\theta) = \pi - \frac{1}{n} \sum_{i=1}^{n} \left| \pi - \left| \theta_i - \tilde\theta \right| \right| $$

where $\tilde\theta$ is the estimate of the preferred direction, and it is used as a measure of dispersion.

The Mardia median occasionally leads to a non-unique estimate of the circular median since there can sometimes be two or more diameters that divide the data equally and have the same circular mean deviation.

Weighted Mardia median:

$$ d(\tilde\theta) = \pi - \frac{1}{\sum \eta_i} \sum_{i=1}^{n} \eta_i \left| \pi - \left| \theta_i - \tilde\theta \right| \right| $$

In [None]:
# checking uniqueness of Mardia Median

x = mardia_median(angles, init_guess=angle_center-0.3)
y = mardia_median(angles, init_guess=angle_center+0.3)

print('Mardia Medians found:')
print(x)
print(y, '\n')

print('Circular mean: \n{}\n'.format(stats.circmean(angles)))

cmd_x = circ_mean_dev(angles, x).item()
cmd_y = circ_mean_dev(angles, y).item()
print('Circular mean deviations for the Mardia Medians (check):')
print(cmd_x)
print(cmd_y, '\n')

if ~np.isclose(x, y):
    print('The Mardia Median in this example is not unique')

### Geometric Median

The geometric median is defined as the value of the argument $y$ that minimizes the sum of Euclidian distances between $y$ and all points $x_i$:

$$ \underset{y \in \mathbb{R}^d}{\mathrm{arg\,min}} \sum_{i=1}^n || x_i - y ||_2  $$

Properties & asides:
 - The geometric median has a breakdown point of 0.5: up to half of the sample data may be arbitrarily corrupted, and the median of the samples will still provide a robust estimator for the location of the uncorrupted data
 - The geometric median is unique whenever the points are not collinear https://ui.adsabs.harvard.edu/abs/2000PNAS...97.1423V/abstract
 - Weiszfeld's algorithm for faster computation
 
We can also weight the geometric median:

$$ \underset{y \in \mathbb{R}^d}{\mathrm{arg\,min}} \sum_{i=1}^n \eta_i || x_i - y ||_2  $$

### Tukey Median

Tukey (1975) proposed the halfspace depth as a tool to visually describe bivariate datasets.

For a finite set of data points $X_n = \{x_1, ..., x_n\}$ in $\mathbb{R}^d$, the Tukey depth, or halfspace depth of any point $y \in \mathbb{R}^d$ determines how central the point is inside the data cloud.

The halfspace depth of $y$ is defined as the minimal number of data points in any closed halfspace determined by a hyperplane through $y$:

$$ hdepth(y; X_n) = \underset{|| u || = 1}{\mathrm{min}} \# \{i; u^{\tau} x_i \geq u^{\tau} y \} $$

The set of all points with depth > k is called the kth depth region Dk. The halfspace depth regions form a sequence of nested polyhedra.

In [None]:
points = np.random.random(500).reshape(250, 2)
points[:, 0] += 2
points[:, 1] += 2
points_c = points[:, 0] + points[:, 1]*1j
sample_gmean = geometric_mean(points_c)
sample_gmed = geometric_median(points, weights=None)
sample_tmed = tukey_median(points)['barycenter']

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=100)

ax.scatter(points[:, 0], points[:, 1], alpha=0.5)
ax.plot(sample_gmean.real, sample_gmean.imag, 'ro', label='gmean')
ax.plot(sample_gmed[0], sample_gmed[1], 'co', label='gmed')
ax.plot(sample_tmed[0], sample_tmed[1], 'yo', label='tmed')
ax.set_xlim(1.5, 3.5)
ax.set_ylim(1.5, 3.5)

plt.legend()
plt.show()

### Other location depth notions

 - Simplicial depth
 - Oja depth
 - Projection depth
 - Spatial depth
 
 See e.g. https://www.csun.edu/~ctoth/Handbook/chap58.pdf, https://cran.r-project.org/web/packages/depth/depth.pdf


In [None]:
for mvm in ['Tukey', 'Oja', 'Liu', 'Spatial', 'CWmed']:
    print('{:7s}: {}'.format(mvm, mv_median(points, method=mvm)))

TODO:
 - slice in arbitrary dimension to e.g. take median about n-th dimension
 - JAX acceleration of cdist
 - robstat as a packge