# Numpy Stats

# Order Stats

In [2]:
#amin ----- Return the minimum of an array or minimum along an axis.

Returns:	
amin : ndarray or scalar

Minimum of a. If axis is None, the result is a scalar value. If axis is given, the result is an array of dimension a.ndim - 1.

In [1]:
import numpy as np
a = np.arange(4).reshape((2,2))

In [2]:
a

array([[0, 1],
       [2, 3]])

In [2]:
np.amin(a)   #Minimum of the flattened array

0

In [3]:
np.amin(a, axis=1)   # Minima along the first axis

array([0, 2])

In [4]:
np.amin(a, axis=1)   # Minima along the second axis

array([0, 2])

In [5]:
b = np.arange(5, dtype=float)

In [6]:
b[2] = np.NaN

In [7]:
b

array([  0.,   1.,  nan,   3.,   4.])

In [8]:
np.amin(b)

nan

In [9]:
np.nanmin(b)

0.0

NaN values are propagated, that is if at least one item is NaN, the corresponding min value will be NaN as well. To ignore NaN values (MATLAB behavior), please use nanmin.

Don’t use amin for element-wise comparison of 2 arrays; when a.shape[0] is 2, minimum(a[0], a[1]) is faster than amin(a, axis=0)

In [10]:
a = np.array([[1, 2], [3, np.nan]])

In [11]:
np.nanmax(a) 
#Return the maximum of an array or maximum along an axis, ignoring any NaNs

3.0

In [12]:
np.nanmax(a, axis=0)

array([ 3.,  2.])

In [13]:
np.nanmax(a, axis=1)

array([ 2.,  3.])

In [14]:
np.nanmax([1, 2, np.nan, np.NINF])

2.0

In [15]:
np.nanmax([1, 2, np.nan, np.inf])

inf

In [16]:
np.isnan(np.nan)

True

In [17]:
np.isnan(np.inf)

False

In [18]:
np.isnan([np.log(-1.),1.,np.log(0)])

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


array([ True, False, False], dtype=bool)

#ptp -- peak to peak

#Range of values (maximum - minimum) along an axis.

In [19]:
x = np.arange(4).reshape((2,2))
x

array([[0, 1],
       [2, 3]])

In [20]:
np.ptp(x, axis=0)

array([2, 2])

In [21]:
np.ptp(x)

3

In [22]:
np.ptp(x,axis=1)

array([1, 1])

#percentile ------ http://www.dummies.com/education/math/statistics/how-to-calculate-percentiles-in-statistics/   check its working here

In [23]:
a = np.array([[10, 7, 4], [3, 2, 1]])

In [24]:
np.percentile(a, 70)

5.5

# Averages and variances

In [26]:
#median

In [25]:
a = np.array([[10, 7, 4], [3, 2, 1]])

np.median(a)

3.5

In [26]:
print(np.median(a, axis=0))

print(np.median(a, axis=1))



[ 6.5  4.5  2.5]
[ 7.  2.]


In [28]:
data = range(1,5)

In [29]:
np.average(data)

2.5

In [30]:
np.average(range(1,11), weights=range(10,0,-1))

4.0

In [35]:
data = np.arange(6).reshape((3,2))

In [36]:
data

array([[0, 1],
       [2, 3],
       [4, 5]])

In [38]:
np.average(data, axis=1, weights=[2,3])

array([ 0.6,  2.6,  4.6])

In [35]:
#mean

In [39]:
a = np.array([[1, 2], [3, 4]])

In [40]:
np.mean(a)

2.5

In [41]:
np.mean(a, axis=0)

array([ 2.,  3.])

In [42]:
np.mean(a, axis=1)

array([ 1.5,  3.5])

In [43]:
#standard deviation

In [44]:
a = np.array([[1, 2], [3, 4]])

In [45]:
np.std(a)

1.1180339887498949

In [46]:
np.std(a, axis=0)

array([ 1.,  1.])

In [47]:
np.std(a, axis=1)

array([ 0.5,  0.5])

In [48]:
#variance

In [49]:
a = np.array([[1, 2], [3, 4]])

In [50]:
np.var(a)

1.25

In [51]:
np.var(a, axis=0)

array([ 1.,  1.])

In [52]:
np.var(a, axis=1)

array([ 0.25,  0.25])

In [53]:
#nanmean

In [54]:
a = np.array([[1, np.nan], [3, 4]])

In [55]:
np.nanmean(a)

2.6666666666666665

In [56]:
np.nanmean(a, axis=0)
np.nanmean(a, axis=1)

array([ 1. ,  3.5])

In [57]:
#similarly check for nanvar, nanstd

In [58]:
a = np.array([[1,2,3], [4,5,6]])

In [59]:
a

array([[1, 2, 3],
       [4, 5, 6]])

In [60]:
np.cumsum(a)

array([ 1,  3,  6, 10, 15, 21], dtype=int32)

In [61]:
np.cumsum(a, dtype=float)

array([  1.,   3.,   6.,  10.,  15.,  21.])

In [62]:
np.cumsum(a,axis=0)

array([[1, 2, 3],
       [5, 7, 9]], dtype=int32)