In this notebook we will discuss statistical properties.

In [1]:
import numpy as np

In [2]:
# min() will return the minimum value 

a = np.array([[5, 7, 9], [3, -6, 11], [2, -8, 13]])
print(a)
print(np.min(a))
print(np.min(a, axis=0)) # along the columns
print(np.min(a, axis=1)) # along the rows

[[ 5  7  9]
 [ 3 -6 11]
 [ 2 -8 13]]
-8
[ 2 -8  9]
[ 5 -6 -8]


In [3]:
# max() will return the maximum value

# a = np.array([[5, 7, 9], [3, 6, 11], [2, 8, 13]])
print(a)
print(np.max(a))
print(np.max(a, 0)) # along the columns
print(np.max(a, 1)) # along the rows

[[ 5  7  9]
 [ 3 -6 11]
 [ 2 -8 13]]
13
[ 5  7 13]
[ 9 11 13]


In [4]:
# ptp() returns the max value minus min value along a specified axis

a = np.array([[5, 7, 9], [3, -6, 11], [2, -8, 13]])
print(a)
print(np.ptp(a))
print(np.ptp(a, 0)) # along the columns
print(np.ptp(a, 1)) # along the rows

[[ 5  7  9]
 [ 3 -6 11]
 [ 2 -8 13]]
21
[ 3 15  4]
[ 4 17 21]


In [5]:
"""
For percentile, the values have to be arranged in ascending order and 
for this example, we then pick 50th percentile
"""


a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.percentile(a, 50))
print(np.percentile(a, 50, axis=0)) # along the columns
print(np.percentile(a, 50, axis=1)) # along the rows



[[20 10 50]
 [30 50 70]
 [60 40 10]]
40.0
[30. 40. 50.]
[20. 50. 40.]


In [16]:
# mean() returns the mean or the average

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.mean(a))
print(np.mean(a, axis=0)) # along the columns
print(np.mean(a, axis=1)) # along the rows

# transpose
a1 = np.mean(a, axis=1)
print(a1)
print(a1.shape)
a2 = np.mean(a, axis=1).T
print(a2)
print(a2.shape)

[[20 10 50]
 [30 50 70]
 [60 40 10]]
37.77777777777778
[36.66666667 33.33333333 43.33333333]
[26.66666667 50.         36.66666667]
[26.66666667 50.         36.66666667]
(3,)
[26.66666667 50.         36.66666667]
(3,)


In [7]:
# median() returns the median 

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.median(a))
print(np.median(a, axis=0)) # along the columns
print(np.median(a, axis=1)) # along the rows

[[20 10 50]
 [30 50 70]
 [60 40 10]]
40.0
[30. 40. 50.]
[20. 50. 40.]


In [8]:
# determining average using weights

a = np.array([1, 2, 3, 4])
wts = np.array([4, 3, 2, 1])
print(np.average(a, weights=wts))

# (1*4) + (2*3) + (3*2) + (4*1) = 20
# 20 / total in weights = 20/10 = 2   => Weight Average / Expected Value

2.0


In [9]:
# determining average using weights
# when returned = True, it returns the average value as well as the 
# sum of the weights

a = np.array([1, 2, 3, 4])
wts = np.array([4, 3, 2, 1])
print(np.average(a, weights=wts, returned = True)) # Weight Average / Expected Value

(2.0, 10.0)


In [10]:
# standard deviation is the square root of average of squared deviations 
# from the mean

a = np.array([1, 2, 3, 4, 5])
print(np.mean(a))
print(np.std(a))
# this value is nothing but the square root of 2.

3.0
1.4142135623730951


In [11]:
"""
In-class activity - create an ndarray with values [2, -4, 6], [7, 4, 8] and [5, 10, 4]. 
Determine the following:
1) The maximum values along the columns and along the rows and for the whole array.
2) The minimum values along the columns and along the rows and for the whole array.
3) The mean and median values along the columns and along the rows and for the whole array.
4) The ptp along columns and rows and for the whole array. 
5) The standard deviation. 
"""
k = np.array([[2, -4, 6], [7, 4, 8], [5, 10, 4]])
print("k:", k)

print("max columns", np.max(k, 0))
print("max rows", np.max(k, 1))

print("min columns", np.min(k, 0))
print("min rows", np.min(k, 1))

print("mean columns", np.mean(k, 0))
print("mean rows", np.mean(k, 1))

print("median columns", np.median(k, 0))
print("median rows", np.median(k, 1))

print("ptp columns", np.ptp(k, 0))
print("ptp rows", np.ptp(k, 1))

print("std", np.std(k, 0))
print("std", np.std(k, 1))

k: [[ 2 -4  6]
 [ 7  4  8]
 [ 5 10  4]]
max columns [ 7 10  8]
max rows [ 6  8 10]
min columns [ 2 -4  4]
min rows [-4  4  4]
mean columns [4.66666667 3.33333333 6.        ]
mean rows [1.33333333 6.33333333 6.33333333]
median columns [5. 4. 6.]
median rows [2. 7. 5.]
ptp columns [ 5 14  4]
ptp rows [10  4  6]
std [2.05480467 5.73488351 1.63299316]
std [4.10960934 1.69967317 2.62466929]


In [12]:
a = np.array([[2, 3],[5, 6],[1, 1]])
print(a)
print(a.sum(axis = 0))
print(a.sum(axis = 1))

[[2 3]
 [5 6]
 [1 1]]
[ 8 10]
[ 5 11  2]


In [13]:
list1 = ['a', 'd', 'w']
print(len(list1))

3


In [14]:
print(list1[0]) # index 0 is for the first value in the list
print(list1[1]) # index 1 is for the second value in the list
print(list1[2]) # index 2 is for the third value in the list

a
d
w


In [15]:
print(list1[-1]) # listname[-1] returns the last value in the list

w
