In [1]:
from __future__ import print_function # for python 2 & python 3 compatibility
%matplotlib inline
import numpy as np

## 1. numpy.argsort

In [2]:
ages = np.random.randint(low=30, high=60, size=10)
heights = np.random.randint(low=150, high=210, size=10)

print(ages)
print(heights)

[35 38 43 57 34 57 53 42 44 47]
[194 157 182 153 160 191 161 155 199 177]


In [17]:
#Say, we want to order the people according to their age and their heights.
sorter = np.argsort(ages)
print(ages[sorter])
print(heights[sorter])

[34 35 38 42 43 44 47 53 57 57]
[160 194 157 155 182 199 177 161 153 191]


### Computing inverse of permutation

In [21]:
permutation = np.random.permutation(10)
original = np.array(list('abcdefghij'))


In [22]:
print(permutation)
print(original)
print(original[permutation])

[0 9 6 7 2 1 4 5 8 3]
['a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j']
['a' 'j' 'g' 'h' 'c' 'b' 'e' 'f' 'i' 'd']


In [24]:
#Inverse permutation is computed using numpy.argsort (again!)

inverse_permutation = np.argsort(permutation)
original[permutation][inverse_permutation]

array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='<U1')

### Computing order of elements in array¶

frequently it is important to compute order of each value in array.

<br>In other words, for each element in array we want to find the number of elements smaller than given.

In [36]:
data = np.array([2,3,4,5,1])
print(data)
print(np.argsort(np.argsort(data)))
np.argsort(data)

[2 3 4 5 1]
[1 2 3 4 0]


array([4, 0, 1, 2, 3])

#### np.clip()

In [37]:
np.clip([-500,-400,-10,0,10,400,500],a_min=-15,a_max=15)

array([-15, -15, -10,   0,  10,  15,  15])

## 2. Broadcasting, numpy.newaxis

Weighted covariation matrix¶

numpy has cov function, but it doesn't support weights of samples. Let's write our own covariation matrix.


In [80]:
data=np.random.normal(size=[10,5])
data

array([[ 0.25469666, -0.63948836, -0.10349587, -0.41905414, -1.60108748],
       [-0.60917092,  0.13755522, -0.15820003, -1.22136298, -0.90924207],
       [-0.68096012,  1.14892366,  0.09638024,  0.67488878,  0.93735122],
       [ 0.3662721 , -0.54466571,  0.18304563, -0.4620419 , -1.67682492],
       [ 0.18530902, -1.50645457, -0.0187298 , -0.26128559, -1.87157569],
       [-1.64086875,  0.658265  , -0.20086578,  0.13490622,  0.93420914],
       [ 0.82858889,  1.87931916, -0.11867659,  0.76611029,  0.74647806],
       [-0.45698954,  1.22284896, -1.52623677, -0.95822136,  0.38547342],
       [ 0.31029919, -0.2881187 , -1.33687559,  0.56807763,  0.18024027],
       [-0.26069674,  0.02481501, -0.68227442, -0.43197673, -0.59236399]])

In [81]:
weights=np.random.rand(10)
weights

array([0.68756514, 0.70880171, 0.93722909, 0.64893491, 0.35314641,
       0.44668649, 0.4636987 , 0.47297428, 0.56947043, 0.7553067 ])

In [85]:
def covariance(data,weights):
    weights=weights/weights.sum()
    return data.T.dot(weights[:, np.newaxis] * data)

In [89]:
print(f'shape of data {data.shape} and shape of weights is {weights.shape}')
covariance(data,weights)

shape of data (10, 5) and shape of weights is (10,)


array([[ 0.42479695, -0.200736  ,  0.05966085,  0.08014134, -0.22168303],
       [-0.200736  ,  0.84581857, -0.12567057,  0.18961166,  0.71486409],
       [ 0.05966085, -0.12567057,  0.42214211,  0.09963175, -0.02008025],
       [ 0.08014134,  0.18961166,  0.09963175,  0.4644242 ,  0.48233997],
       [-0.22168303,  0.71486409, -0.02008025,  0.48233997,  1.19720716]])

In [91]:
np.cov(data.T)

array([[ 0.50334005, -0.14764389,  0.01123938,  0.0917796 , -0.32489579],
       [-0.14764389,  1.04667053, -0.10672783,  0.22666948,  0.98408795],
       [ 0.01123938, -0.10672783,  0.35787869,  0.05325432, -0.21264541],
       [ 0.0917796 ,  0.22666948,  0.05325432,  0.4642215 ,  0.42293778],
       [-0.32489579,  0.98408795, -0.21264541,  0.42293778,  1.26034094]])

In [92]:
#altrenative way to do this 
np.einsum('ij, ik, i -> jk', data, data, weights / weights.sum())

array([[ 0.42479695, -0.200736  ,  0.05966085,  0.08014134, -0.22168303],
       [-0.200736  ,  0.84581857, -0.12567057,  0.18961166,  0.71486409],
       [ 0.05966085, -0.12567057,  0.42214211,  0.09963175, -0.02008025],
       [ 0.08014134,  0.18961166,  0.09963175,  0.4644242 ,  0.48233997],
       [-0.22168303,  0.71486409, -0.02008025,  0.48233997,  1.19720716]])

#### More about Einsum : https://stackoverflow.com/questions/26089893/understanding-numpys-einsum

In [93]:
A = np.array([0, 1, 2])

B = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])

In [94]:
A,B

(array([0, 1, 2]), array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]))

In [97]:
np.einsum('i,ij->i',A,B)

array([ 0, 22, 76])

In [101]:
np.einsum('i,ij->j',A,B)


array([20, 23, 26, 29])

In [102]:
np.einsum('i,ij->ij',A,B)

array([[ 0,  0,  0,  0],
       [ 4,  5,  6,  7],
       [16, 18, 20, 22]])