📝 **Author:** Amirhossein Heydari - 📧 **Email:** <amirhosseinheydari78@gmail.com> - 📍 **Origin:** [mr-pylin/numpy-workshop](https://github.com/mr-pylin/numpy-workshop)

---


**Table of contents**<a id='toc0_'></a>    
- [Dependencies](#toc1_)    
- [NumPy - Statistics](#toc2_)    
  - [Order statistics](#toc2_1_)    
  - [Averages and variances](#toc2_2_)    
  - [Correlating](#toc2_3_)    
  - [Histograms](#toc2_4_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Dependencies](#toc0_)


In [None]:
import numpy as np

# <a id='toc2_'></a>[NumPy - Statistics](#toc0_)

📝 **Docs**:

- Statistics: [numpy.org/doc/stable/reference/routines.statistics.html](https://numpy.org/doc/stable/reference/routines.statistics.html)


## <a id='toc2_1_'></a>[Order statistics](#toc0_)

<table style="margin: 0 auto;">
  <thead>
    <tr>
      <th style="text-align: center;">Function</th>
      <th style="text-align: center;">Description</th>
      <th style="text-align: center;">Details</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><code>np.ptp</code></td>
      <td>Range of values (maximum - minimum) along an axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.ptp.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.percentile</code></td>
      <td>Compute the q-th percentile of the data along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.percentile.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.quantile</code></td>
      <td>Compute the q-th quantile of the data along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.quantile.html">link</a></td>
    </tr>
  </tbody>
</table>


In [None]:
arr_1d_1 = np.array([5, 1, 2, 0, 7, 4, 6, 2, 1, 5, 7, 2])
arr_2d_1 = np.array([[5, 0, 1, 4, 2], [6, 3, 5, 1, 1]])
arr_3d_1 = np.array([[[6, 2], [1, 2]], [[4, 2], [6, 4]]])

# ptp (max - min)
ptp_1 = np.ptp(arr_1d_1)
ptp_2 = np.ptp(arr_2d_1)
ptp_3 = np.ptp(arr_2d_1, axis=0)
ptp_4 = np.ptp(arr_2d_1, axis=1)
ptp_5 = np.ptp(arr_3d_1)

# log
print(f"ptp_1 : {ptp_1}")
print(f"ptp_2 : {ptp_2}")
print(f"ptp_3 : {ptp_3}")
print(f"ptp_4 : {ptp_4}")
print(f"ptp_5 : {ptp_5}")

In [None]:
arr_1d_2 = np.array([5, 1, 2, 0, 7, 4, 6, 2, 1, 5, 7, 2])
arr_2d_2 = np.array([[5, 0, 1, 4, 2], [6, 3, 5, 1, 1]])
arr_3d_2 = np.array([[[6, 2], [1, 2]], [[4, 2], [6, 4]]])

# percentile
percentile_1 = np.percentile(arr_1d_2, q=25)  # first quarter
percentile_2 = np.percentile(arr_1d_2, q=50)  # second quarter [median]
percentile_3 = np.percentile(arr_2d_2, q=50)  # second quarter [median]
percentile_4 = np.percentile(arr_3d_2, q=75)  # third quarter
percentile_5 = np.percentile(arr_3d_2, q=75, axis=0)

# quantile
quantile_1 = np.quantile(arr_1d_2, q=0.25)  # first quarter
quantile_2 = np.quantile(arr_1d_2, q=0.5)  # second quarter [median]
quantile_3 = np.quantile(arr_2d_2, q=0.5)  # second quarter [median]
quantile_4 = np.quantile(arr_3d_2, q=0.75)  # third quarter
quantile_5 = np.quantile(arr_3d_2, q=0.75, axis=1)

# log
print(f"percentile_1 : {percentile_1}")
print(f"percentile_2 : {percentile_2}")
print(f"percentile_3 : {percentile_3}")
print(f"percentile_4 : {percentile_4}")
print(f"percentile_5 :\n{percentile_5}", end=f"\n{'-' * 50}\n")
print(f"quantile_1   : {quantile_1}")
print(f"quantile_2   : {quantile_2}")
print(f"quantile_3   : {quantile_3}")
print(f"quantile_4   : {quantile_4}")
print(f"quantile_5   :\n{quantile_5}")

## <a id='toc2_2_'></a>[Averages and variances](#toc0_)

<table style="margin: 0 auto;">
  <thead>
    <tr>
      <th style="text-align: center;">Function</th>
      <th style="text-align: center;">Description</th>
      <th style="text-align: center;">Details</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><code>np.median</code></td>
      <td>Compute the median along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.median.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.average</code></td>
      <td>Compute the weighted average along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.average.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.mean</code></td>
      <td>Compute the arithmetic mean along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.mean.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.std</code></td>
      <td>Compute the standard deviation along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.std.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.var</code></td>
      <td>Compute the variance along the specified axis</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.var.html">link</a></td>
    </tr>
  </tbody>
</table>


In [None]:
arr_1d_3 = np.array([5, 1, 2, 0, 7, 4, 6, 2, 1, 5, 7, 2])
arr_2d_3 = np.array([[5, 0, 1, 4, 2], [6, 3, 5, 1, 1]])
arr_3d_3 = np.array([[[6, 2], [1, 2]], [[4, 2], [6, 4]]])

# median
median_1 = np.median(arr_1d_3)
median_2 = np.median(arr_2d_3)
median_3 = np.median(arr_2d_3, axis=0)
median_4 = np.median(arr_2d_3, axis=1)
median_5 = np.median(arr_3d_3)

# average (supports weights)
average_1 = np.average(arr_1d_3, weights=np.arange(len(arr_1d_1)))
average_2 = np.average(arr_2d_3)
average_3 = np.average(arr_2d_3, axis=0)
average_4 = np.average(arr_2d_3, axis=1)
average_5 = np.average(arr_3d_3)

# mean (doesn't support weights)
mean_1 = np.mean(arr_1d_3)  # same as np.mean
mean_2 = np.mean(arr_2d_3)
mean_3 = np.mean(arr_2d_3, axis=0)
mean_4 = np.mean(arr_2d_3, axis=1)
mean_5 = np.mean(arr_3d_3)

# std (standard deviation)
std_1 = np.std(arr_1d_3)
std_2 = np.std(arr_2d_3)
std_3 = np.std(arr_2d_3, axis=0)
std_4 = np.std(arr_2d_3, axis=1)
std_5 = np.std(arr_3d_3)

# var (variance)
var_1 = np.var(arr_1d_3)
var_2 = np.var(arr_2d_3)
var_3 = np.var(arr_2d_3, axis=0)
var_4 = np.var(arr_2d_3, axis=1)
var_5 = np.var(arr_3d_3)

# log
print(f"median_1  : {median_1}")
print(f"median_2  : {median_2}")
print(f"median_3  : {median_3}")
print(f"median_4  : {median_4}")
print(f"median_5  : {median_5}")
print(f"average_1 : {average_1}")
print(f"average_2 : {average_2}")
print(f"average_3 : {average_3}")
print(f"average_4 : {average_4}")
print(f"average_5 : {average_5}")
print(f"mean_1    : {mean_1}")
print(f"mean_2    : {mean_2}")
print(f"mean_3    : {mean_3}")
print(f"mean_4    : {mean_4}")
print(f"mean_5    : {mean_5}")
print(f"std_1     : {std_1}")
print(f"std_2     : {std_2}")
print(f"std_3     : {std_3}")
print(f"std_4     : {std_4}")
print(f"std_5     : {std_5}")
print(f"var_1     : {var_1}")
print(f"var_2     : {var_2}")
print(f"var_3     : {var_3}")
print(f"var_4     : {var_4}")
print(f"var_5     : {var_5}")

## <a id='toc2_3_'></a>[Correlating](#toc0_)

<table style="margin: 0 auto;">
  <thead>
    <tr>
      <th style="text-align: center;">Function</th>
      <th style="text-align: center;">Description</th>
      <th style="text-align: center;">Details</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><code>np.corrcoef</code></td>
      <td>Return Pearson product-moment correlation coefficients</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.correlate</code></td>
      <td>Cross-correlation of two 1-dimensional sequences</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.correlate.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.cov</code></td>
      <td>Estimate a covariance matrix, given data and weights</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.cov.html">link</a></td>
    </tr>
  </tbody>
</table>


In [None]:
arr_2d_4 = np.array(
    [
        [6, 2, 3],
        [4, 2, 1],
        [6, 5, 6],
        [3, 6, 4],
        [8, 5, 6],
        [3, 6, 1],
    ]
)

# corrcoef
corrcoef_1 = np.corrcoef(arr_2d_4)  # find pearson correlation between samples (rows)
corrcoef_2 = np.corrcoef(arr_2d_4, rowvar=False)  # find pearson correlation between features (columns)

# log
print(f"corrcoef_1 :\n{corrcoef_1}", end=f"\n{'-' * 50}\n")
print(f"corrcoef_2 :\n{corrcoef_2}")

In [None]:
signal_1d_1 = np.array([1, 2, 3])

# fourier basis vectors of length 3
N = len(signal_1d_1)
n = np.arange(N)
basis_0 = (1 / np.sqrt(N)) * np.ones(N)
basis_1 = (1 / np.sqrt(N)) * np.exp(-1j * (2 * np.pi / N) * 1 * n)
basis_2 = (1 / np.sqrt(N)) * np.exp(-1j * (2 * np.pi / N) * 2 * n)

# correlate
coef_0 = np.correlate(signal_1d_1, basis_0, mode="valid")  # DC
coef_1 = np.correlate(signal_1d_1, basis_1, mode="valid")  # AC
coef_2 = np.correlate(signal_1d_1, basis_2, mode="valid")  # AC

# log
print(f"Fourier coefficients: {np.concatenate([coef_0, coef_1, coef_2])}")

In [None]:
arr_2d_5 = np.array(
    [
        [6, 2, 3],
        [4, 2, 1],
        [6, 5, 6],
        [3, 6, 4],
        [8, 5, 6],
        [3, 6, 1],
    ]
)

# cov
cov_1 = np.cov(arr_2d_5)  # find covariance between samples (rows)
cov_2 = np.cov(arr_2d_5, rowvar=False)  # find covariance between features (rows)

# log
print(f"cov_1 :\n{cov_1}", end=f"\n{'-' * 50}\n")
print(f"cov_2 :\n{cov_2}")

## <a id='toc2_4_'></a>[Histograms](#toc0_)

<table style="margin: 0 auto;">
  <thead>
    <tr>
      <th style="text-align: center;">Function</th>
      <th style="text-align: center;">Description</th>
      <th style="text-align: center;">Details</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><code>np.histogram</code></td>
      <td>Compute the histogram of a dataset</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.histogram.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.histogram2d</code></td>
      <td>Compute the bi-dimensional histogram of two data samples</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.histogram2d.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.histogramdd</code></td>
      <td>Compute the multidimensional histogram of some data</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.histogramdd.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.bincount</code></td>
      <td>Count number of occurrences of each value in array of non-negative ints</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.bincount.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.histogram_bin_edges</code></td>
      <td>Function to calculate only the edges of the bins used by the <code>np.histogram</code> function</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.histogram_bin_edges.html">link</a></td>
    </tr>
    <tr>
      <td><code>np.digitize</code></td>
      <td>Return the indices of the bins to which each value in input array belongs</td>
      <td style="text-align: center;"><a href="https://numpy.org/doc/stable/reference/generated/numpy.digitize.html">link</a></td>
    </tr>
  </tbody>
</table>


In [None]:
arr_1d_4 = np.array([1, 2, 1, 3, 2, 5, 1, 4])

# histogram
histogram_1, bin_1 = np.histogram(arr_1d_4, bins=3)
histogram_2, bin_2 = np.histogram(arr_1d_4, bins=3, range=(1, 8))
histogram_3, bin_3 = np.histogram(arr_1d_4, bins=[0, 1, 2, 3, 4, 5, 6])

# log
print(f"histogram_1 : {histogram_1}")
print(f"bin_1       : {bin_1}")
print(f"histogram_2 : {histogram_2}")
print(f"bin_2       : {bin_2}")
print(f"histogram_3 : {histogram_3}")
print(f"bin_3       : {bin_3}")

In [None]:
x = np.array([1, 2, 8, 3, 2, 5, 1, 4])
y = np.array([2, 1, 3, 3, 4, 5, 2, 8])

# histogram2d
hist2d_1, xedges_1, yedges_1 = np.histogram2d(x, y, bins=3)
hist2d_2, xedges_2, yedges_2 = np.histogram2d(x, y, bins=(3, 5))

# log
print(f"hist2d_1 :\n{hist2d_1}", end=f"\n{'-' * 50}\n")
print(f"xedges_1 :\n{xedges_1}", end=f"\n{'-' * 50}\n")
print(f"yedges_1 :\n{yedges_1}", end=f"\n{'-' * 50}\n")
print(f"hist2d_2 :\n{hist2d_2}", end=f"\n{'-' * 50}\n")
print(f"xedges_2 :\n{xedges_2}", end=f"\n{'-' * 50}\n")
print(f"yedges_2 :\n{yedges_2}")

In [None]:
arr_2d_6 = np.array([[1, 2, 4], [2, 1, 1], [1, 3, 2], [3, 3, 2], [2, 4, 3], [4, 5, 1]])

# histogramdd
histdd_1, edges_1 = np.histogramdd(arr_2d_6, bins=(3, 3, 3))

# log
print(f"histdd_1 :\n{histdd_1}", end=f"\n{'-' * 50}\n")
print(f"Edges :\n{edges_1}")

In [None]:
arr_1d_5 = np.array([1, 2, 1, 3, 2, 5, 1, 4])

# bincount
bincount_1 = np.bincount(arr_1d_5)

# log
print(f"bincount_1: {bincount_1}")

In [None]:
arr_1d_6 = np.array([1, 2, 1, 3, 2, 5, 1, 4])

# histogram_bin_edges
bin_edges_1 = np.histogram_bin_edges(arr_1d_6, bins=3)
bin_edges_2 = np.histogram_bin_edges(arr_1d_6, bins=5)

# log
print(f"bin_edges_1: {bin_edges_1}")
print(f"bin_edges_2: {bin_edges_2}")

In [None]:
arr_1d_7 = np.array([1, 2.5, 4.7, 3, 2, 11, -1.9, 4.3])
bins = np.array([1, 2, 3, 4])

# digitize
digitize_1 = np.digitize(arr_1d_7, bins)

# log
print(f"digitize_1: {digitize_1}")