## Numpy Lesson2 - Statistics

https://docs.scipy.org/doc/numpy/reference/routines.statistics.html


In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.seed(123)
x = np.random.randint(100, size=10)
x

In [None]:
np.mean(x)

In [None]:
np.amin(x), np.amax(x)

### Range

In [None]:
# Range of values (max - min)
# ptp - peak to peak

np.ptp(x)

In [None]:
# 2-D
np.random.seed(123)
x2 = np.random.randint(100, size=(5,2))
x2

In [None]:
np.amin(x2, axis=0), np.amax(x2, axis=0)

In [None]:
np.ptp(x2, axis=0)

In [None]:
np.amin(x2, axis=1), np.amax(x2, axis=1)

In [None]:
np.ptp(x2, axis=1)

### Quantiles

In [None]:
np.random.seed(123)
x = np.random.randint(100, size=10)
x

In [None]:
np.median(x)

In [None]:
np.quantile(x, 0.5)

In [None]:
np.sort(x)

In [None]:
np.quantile(x, [0, 0.25, 0.5, 0.75, 1])

In [None]:
np.quantile(x, [0, 0.25, 0.5, 0.75, 1], interpolation = 'nearest')

In [None]:
# 2-D
x2

In [None]:
np.quantile(x2, 0.5, axis=0)

### Percentiles

In [None]:
np.random.seed(123)
x = np.random.randint(100, size=10)
x

In [None]:
np.sort(x)

In [None]:
np.percentile(x, 50)

In [None]:
np.percentile(x, [0, 25, 50, 75, 100])

In [None]:
np.percentile(x, [0, 25, 50, 75, 100], interpolation = 'nearest')

In [None]:
# 2-D
x2

In [None]:
np.percentile(x2, 50, axis=0)

### Histograms

In [None]:
# np.histogram  - Compute the histogram of a set of data

np.random.seed(123)
x = np.random.randint(100, size=10)
x

In [None]:
np.sort(x)

In [None]:
np.histogram(x)

In [None]:
plt.hist(x)

In [None]:
np.histogram(x, bins=3)

In [None]:
counts, bin_edges = np.histogram(x, bins=3)

In [None]:
counts

In [None]:
bin_edges

In [None]:
bin_edges.size - 1

In [None]:
for i in range(bin_edges.size - 1):
    print(bin_edges[i], '-', bin_edges[i+1], '=', counts[i])

In [None]:
plt.hist(x)

In [None]:
plt.hist(x, bins=3)

In [None]:
# densities

In [None]:
x

In [None]:
densities, bin_edges = np.histogram(x, bins = 3, density = True)

In [None]:
densities

In [None]:
bin_edges

In [None]:
np.diff(bin_edges)

In [None]:
np.sum(densities * np.diff(bin_edges))

In [None]:
plt.hist(x, bins = 3, density = True)

### Number of occurrences

### np.bincount
 - Count number of occurrences of each value in array of non-negative ints

In [None]:
np.random.seed(567)
x = np.random.randint(100, size=10)
x

In [None]:
x2 = x.repeat(2)
x2

In [None]:
np.bincount(x2)

In [None]:
np.bincount(x2).size

In [None]:
np.amax(x2) + 1

In [None]:
result = np.bincount(x2)
result[result != 0]

### Digitize

### np.digitize
 - Return the indices of the bins to which each value in input array belongs

In [None]:
np.random.seed(167)
x = np.random.randint(100, size=10)
x[-1] = 25
x

In [None]:
bins = np.linspace(0, 100, 5)
bins

In [None]:
indices = np.digitize(x, bins)
indices

In [None]:
for i in range(x.size):
    print('{:5.2f} <= {:3d} < {:5.2f}'.format(\
                                    bins[indices[i] - 1], \
                                    x[i], \
                                    bins[indices[i]]))
   

## Case Study - Bin Smoothing

In [None]:
x

In [None]:
for i in range(1, bins.size):
    print(x[indices == i])
    

In [None]:
bin_means = [x[indices == i].mean() for i in range(1, bins.size)]
bin_means

In [None]:
x_smoothed = np.copy(x)
x_smoothed

In [None]:
for i in range(1, bins.size):
    x_smoothed[indices == i] = bin_means[i-1]

x_smoothed

In [None]:
 x.dtype.name

In [None]:
x_smoothed = (np.copy(x)).astype(float)
x_smoothed

In [None]:
for i in range(1, bins.size):
    x_smoothed[indices == i] = bin_means[i-1]

x_smoothed