In [1]:
from __future__ import print_function # for python 2 & python 3 compatibility
%matplotlib inline
import numpy as np

## 1. numpy.argsort

In [2]:
ages = np.random.randint(low=30, high=60, size=10)
heights = np.random.randint(low=150, high=210, size=10)

print(ages)
print(heights)

[55 32 35 47 47 53 30 43 44 35]
[179 175 195 197 186 189 174 167 199 171]


In [3]:
#Say, we want to order the people according to their age and their heights.
sorter = np.argsort(ages)
print(ages[sorter])
print(heights[sorter])

[30 32 35 35 43 44 47 47 53 55]
[174 175 195 171 167 199 197 186 189 179]


### Computing inverse of permutation

In [4]:
permutation = np.random.permutation(10)
original = np.array(list('abcdefghij'))


In [5]:
print(permutation)
print(original)
print(original[permutation])

[6 8 0 4 3 5 9 2 7 1]
['a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j']
['g' 'i' 'a' 'e' 'd' 'f' 'j' 'c' 'h' 'b']


In [6]:
#Inverse permutation is computed using numpy.argsort (again!)

inverse_permutation = np.argsort(permutation)
original[permutation][inverse_permutation]

array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='<U1')

### Computing order of elements in array¶

frequently it is important to compute order of each value in array.

<br>In other words, for each element in array we want to find the number of elements smaller than given.

In [7]:
data = np.array([2,3,4,5,1])
print(data)
print(np.argsort(np.argsort(data)))
np.argsort(data)

[2 3 4 5 1]
[1 2 3 4 0]


array([4, 0, 1, 2, 3])

#### np.clip()

In [8]:
np.clip([-500,-400,-10,0,10,400,500],a_min=-15,a_max=15)

array([-15, -15, -10,   0,  10,  15,  15])

## 2. Broadcasting, numpy.newaxis

Weighted covariation matrix¶

numpy has cov function, but it doesn't support weights of samples. Let's write our own covariation matrix.


In [9]:
data=np.random.normal(size=[10,5])
data

array([[ 0.62697831, -0.82936594, -0.51035373,  2.11354918, -1.16451852],
       [-0.88368543,  0.71411343, -0.03361894, -0.92692087,  0.27268249],
       [-0.75195205,  0.25862438,  0.28456944,  0.17131761,  1.50607021],
       [-0.7844922 ,  1.12548012,  0.62508268, -0.47310641,  1.57188669],
       [-0.40810572,  1.31967259, -1.56252608, -0.58223565,  0.29869067],
       [-0.64374009,  0.43392141,  0.54347548,  1.12752574,  2.21099653],
       [ 2.35705235, -0.82332   , -0.17441953, -0.47774366,  0.14900125],
       [-0.06579124, -1.05934393,  0.22793442,  0.9079189 , -0.51671027],
       [ 0.34508943, -0.06117332,  0.70522988,  0.28856896,  0.71409698],
       [-0.75343149, -1.17316834,  0.27487051, -0.15465735,  0.40863545]])

In [10]:
weights=np.random.rand(10)
weights

array([0.66837518, 0.04826067, 0.59164297, 0.0552621 , 0.56002824,
       0.08915016, 0.06841808, 0.10042257, 0.73559933, 0.36945704])

In [11]:
def covariance(data,weights):
    weights=weights/weights.sum()
    return data.T.dot(weights[:, np.newaxis] * data)

In [12]:
print(f'shape of data {data.shape} and shape of weights is {weights.shape}')
covariance(data,weights)

shape of data (10, 5) and shape of weights is (10,)


array([[ 0.44941574, -0.20784625,  0.0099394 ,  0.29548458, -0.40710349],
       [-0.20784625,  0.68652317, -0.28445612, -0.48958188,  0.34282694],
       [ 0.0099394 , -0.28445612,  0.62019412,  0.00537151,  0.28866913],
       [ 0.29548458, -0.48958188,  0.00537151,  1.07361627, -0.40910951],
       [-0.40710349,  0.34282694,  0.28866913, -0.40910951,  1.01606504]])

In [13]:
np.cov(data.T)

array([[ 1.01000264, -0.4515074 , -0.11778518,  0.12450299, -0.44819594],
       [-0.4515074 ,  0.84893987, -0.12329924, -0.39643544,  0.5139893 ],
       [-0.11778518, -0.12329924,  0.45677481,  0.06705645,  0.33289946],
       [ 0.12450299, -0.39643544,  0.06705645,  0.88353569, -0.26278776],
       [-0.44819594,  0.5139893 ,  0.33289946, -0.26278776,  1.0158009 ]])

In [14]:
#altrenative way to do this 
np.einsum('ij, ik, i -> jk', data, data, weights / weights.sum())

array([[ 0.44941574, -0.20784625,  0.0099394 ,  0.29548458, -0.40710349],
       [-0.20784625,  0.68652317, -0.28445612, -0.48958188,  0.34282694],
       [ 0.0099394 , -0.28445612,  0.62019412,  0.00537151,  0.28866913],
       [ 0.29548458, -0.48958188,  0.00537151,  1.07361627, -0.40910951],
       [-0.40710349,  0.34282694,  0.28866913, -0.40910951,  1.01606504]])

#### More about Einsum : https://stackoverflow.com/questions/26089893/understanding-numpys-einsum

In [15]:
A = np.array([0, 1, 2])

B = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])

In [20]:
A,B

(array([0, 1, 2]), array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]))

In [17]:
np.einsum('i,ij->i',A,B)

array([ 0, 22, 76])

In [18]:
np.einsum('i,ij->j',A,B)


array([20, 23, 26, 29])

In [19]:
np.einsum('i,ij->ij',A,B)

array([[ 0,  0,  0,  0],
       [ 4,  5,  6,  7],
       [16, 18, 20, 22]])

In [24]:
(A[:,np.newaxis]*B).sum(axis=1)

array([ 0, 22, 76])

In [26]:
A = np.array([[1, 1, 1],
           [2, 2, 2],
           [5, 5, 5]])

B = np.array([[0, 1, 0],
           [1, 1, 0],
           [1, 1, 1]])

In [27]:
np.einsum('ij,jk->ik',A,B)

array([[ 2,  3,  1],
       [ 4,  6,  2],
       [10, 15,  5]])

In [34]:
A = np.arange(10)
B = np.arange(5, 15)
A,B

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]))

In [35]:
np.einsum('i->',A)

45

In [37]:
np.einsum('i,i->i',A,B)

array([  0,   6,  14,  24,  36,  50,  66,  84, 104, 126])

In [45]:
A=np.arange(4).reshape(2,2)
B=np.arange(4,6).reshape(2,)
A,B

(array([[0, 1],
        [2, 3]]), array([4, 5]))

In [46]:
#Diagonal
np.einsum('ii',A)

3

In [63]:
np.einsum('ij,i->ji',A,B)

array([[ 0, 10],
       [ 4, 15]])

In [102]:
A=np.array([1,2,3,4]).reshape(2,2)
B=np.array([10,100,1000,10000]).reshape(2,2)
print(f'{A}\n{B}')

[[1 2]
 [3 4]]
[[   10   100]
 [ 1000 10000]]


In [103]:
np.einsum('ij,ik->ijk',A,B)

array([[[   10,   100],
        [   20,   200]],

       [[ 3000, 30000],
        [ 4000, 40000]]])

In [104]:
np.einsum('ij,ik->ik',A,B)

array([[   30,   300],
       [ 7000, 70000]])

In [109]:
##3
np.einsum('ij,ij->ij',A,B)

array([[   10,   200],
       [ 3000, 40000]])

In [110]:
##4
np.einsum('ii,jk->ijk',A,B)

array([[[   10,   100],
        [ 1000, 10000]],

       [[   40,   400],
        [ 4000, 40000]]])

In [111]:
np.einsum('ij,jk->ijk',A,B)

array([[[   10,   100],
        [ 2000, 20000]],

       [[   30,   300],
        [ 4000, 40000]]])