#### Numpy statistical properties.

In [2]:
import numpy as np

In [3]:
# min() will return the minimum value 

a = np.array([[5, 7, 9, 8], [3, -6, 11, 10], [2, -8, 13, 5]])
print(a)
print(np.min(a))
print(np.min(a, 0)) # along the columns
print(np.min(a, 1)) # along the rows

[[ 5  7  9  8]
 [ 3 -6 11 10]
 [ 2 -8 13  5]]
-8
[ 2 -8  9  5]
[ 5 -6 -8]


In [5]:
# To better document your code 
# Create constants like below.
BY_COLUMN = 0
BY_ROW = 1

a = np.array([[5, 7, 9, 8], [3, -6, 11, 10], [2, -8, 13, 5]])
print(a)
print(np.min(a))
print(np.min(a, BY_COLUMN)) 
print(np.min(a, BY_ROW))

[[ 5  7  9  8]
 [ 3 -6 11 10]
 [ 2 -8 13  5]]
-8
[ 2 -8  9  5]
[ 5 -6 -8]


In [7]:
# max() will return the maximum value

a = np.array([[5, 7, 9, 8], [3, 6, 11, 10], [2, 9, 13, 5]])
print(a)
print(np.max(a))
print(np.max(a, BY_COLUMN))
print(np.max(a, BY_ROW))

[[ 5  7  9  8]
 [ 3  6 11 10]
 [ 2  9 13  5]]
13
[ 5  9 13 10]
[ 9 11 13]


In [5]:
# ptp() returns the max value minus min value along a specified axis

a = np.array([[5, 7, 9], [3, -6, 11], [2, -8, 13]])
print(a)
print(np.ptp(a))
print(np.ptp(a, BY_COLUMN)) 
print(np.ptp(a, BY_ROW))

[[ 5  7  9]
 [ 3 -6 11]
 [ 2 -8 13]]
21
[ 3 15  4]
[ 4 17 21]


In [None]:
# 10, 23, 5, 7, 1, 50

1, 5, 7, 10, 23, 50

7+10/2 = 8.5 median


1,..      ...... 20  .100
22, 23, 45,      85

In [6]:
"""
For percentile, the values have to be arranged in ascending order and 
for this example, we then pick 50th percentile
"""

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.percentile(a, 50))
print(np.percentile(a, 50, axis=BY_COLUMN)) 
print(np.percentile(a, 50, axis=BY_ROW))


[[20 10 50]
 [30 50 70]
 [60 40 10]]
40.0
[30. 40. 50.]
[20. 50. 40.]


In [6]:
# mean() turns the mean or the average

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.mean(a))
print(np.mean(a, axis=BY_COLUMN)) 
print(np.mean(a, axis=BY_ROW))

[[20 10 50]
 [30 50 70]
 [60 40 10]]
37.77777777777778
[36.66666667 33.33333333 43.33333333]
[26.66666667 50.         36.66666667]


In [7]:
# median() returns the median 

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.median(a))
print(np.median(a, axis=BY_COLUMN)) # along the columns
print(np.median(a, axis=BY_ROW)) # along the rows

[[20 10 50]
 [30 50 70]
 [60 40 10]]
40.0
[30. 40. 50.]
[20. 50. 40.]


In [9]:
# determining average using weights

a = np.array([1, 2, 3, 4])
wts = np.array([4, 3, 2, 1])
print(np.average(a, weights=wts))

# (1*4) + (2*3) + (3*2) + (4*1) = 20 
# 20 / total in weights = 20/10 = 2

2.0


In [10]:
# determining average using weights
# when returned = True, it returns the average value as well as the 
# sum of the weights

a = np.array([1, 2, 3, 4])
wts = np.array([4, 3, 2, 1])
print(np.average(a, weights=wts, returned = True))

(2.0, 10.0)


In [11]:
# standard deviation is the square root of average of squared deviations 
# from the mean

a = np.array([1, 2, 3, 4, 5])
print(np.mean(a))
print(np.std(a))
# this value is nothing but the square root of 2.

3.0
1.4142135623730951


**Example 1**
Create an ndarray with values [2, -4, 6], [7, 4, 8] and [5, 10, 4]. 

Determine the following calculation along the rows and columns of the matrix:

 - Maximum values.
 - Minimum values.
 - Mean values
 - The standard deviation values
 

In [8]:
# Maximum 
a = np.array([[2, 3],[5, 6],[1, 1]])
print(a)
print('Column max = {0}'.format(a.max(axis = BY_COLUMN)))
print('Row max    = {0}'.format(a.max(axis = BY_ROW)))

[[2 3]
 [5 6]
 [1 1]]
Column max = [5 6]
Row max    = [3 6 1]


In [9]:
# Minimum 
a = np.array([[2, 3],[5, 6],[1, 1]])
print(a)
print('Column min = {0}'.format(a.min(axis = BY_COLUMN)))
print('Row min    = {0}'.format(a.min(axis = BY_ROW)))

[[2 3]
 [5 6]
 [1 1]]
Column min = [1 1]
Row min    = [2 5 1]


In [10]:
# Mean 
a = np.array([[2, 3],[5, 6],[1, 1]])
print(a)
print('Column mean = {0}'.format(a.mean(axis = BY_COLUMN)))
print('Row mean    = {0}'.format(a.mean(axis = BY_ROW)))

[[2 3]
 [5 6]
 [1 1]]
Column mean = [2.66666667 3.33333333]
Row mean    = [2.5 5.5 1. ]


In [11]:
# Std 
a = np.array([[2, 3],[5, 6],[1, 1]])
print(a)
print('Column std = {0}'.format(a.std(axis = BY_COLUMN)))
print('Row std    = {0}'.format(a.std(axis = BY_ROW)))

[[2 3]
 [5 6]
 [1 1]]
Column std = [1.69967317 2.05480467]
Row std    = [0.5 0.5 0. ]


In [9]:
# All operations in one code
from dataclasses import dataclass

@dataclass
class Calculation:
    name: str
    call: callable

operations = [
    Calculation("Maxinum", np.max), 
    Calculation('Minimum', np.min),
    Calculation('Mean', np.mean)]
a = np.array([[2, 3],[5, 6],[1, 1]])
for op in operations:
    print(f'\n === {op.name} ===\n')
    print('By Column : {0}'.format(op.call(a, axis = BY_COLUMN)))
    print('By Row    : {0}'.format(op.call(a, axis = BY_ROW)))
    


 === Maxinum ===

By Column : [5 6]
By Row    : [3 6 1]

 === Minimum ===

By Column : [1 1]
By Row    : [2 5 1]

 === Mean ===

By Column : [2.66666667 3.33333333]
By Row    : [2.5 5.5 1. ]
