# NumPy Reference
- Reference
https://learning.oreilly.com/library/view/python-for-data/9781449323592/ch04.html

* Python lists can have heterogeneous. This requires two levels of pointer access.
* Python array supports homegeneous type. This gives efficient storage.
* np.array, in addition to efficient storage, also provides efficient operation on data.
* Use np.arrray for ALL homegeneous data array needs.

In [66]:
import numpy as np
import pandas as pd
from numpy.random import default_rng
rng = default_rng()

### Diff in performance between list and np.array performance

In [3]:
%timeit [ a**2 for a in range(10) ]

2.58 µs ± 103 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [4]:
 %timeit np.arange(10) ** 2

1.18 µs ± 71.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [5]:
np.arange(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [6]:
np.zeros(10, dtype=int) # default will be float

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
np.ones((3,4)) # 2 D array

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [8]:
np.full((2,3,4), 123) # start with some value.

array([[[123, 123, 123, 123],
        [123, 123, 123, 123],
        [123, 123, 123, 123]],

       [[123, 123, 123, 123],
        [123, 123, 123, 123],
        [123, 123, 123, 123]]])

In [9]:
np.arange(1,20,2)

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [10]:
np.linspace(1,2,10) # linearly spaced numbers.

array([1.        , 1.11111111, 1.22222222, 1.33333333, 1.44444444,
       1.55555556, 1.66666667, 1.77777778, 1.88888889, 2.        ])

In [11]:
np.random.random((3,3)) # random nos

array([[0.34447042, 0.46465401, 0.57264347],
       [0.63879384, 0.00937727, 0.72993168],
       [0.97001281, 0.98843865, 0.49750511]])

In [12]:
np.random.randint(1,10, (3,3))

array([[6, 1, 5],
       [3, 3, 4],
       [2, 8, 1]])

In [13]:
np.eye(3) # identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [14]:
# shape, dim and size

x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array
print (f'dim  : {x3.ndim}')
print (f'shape: {x3.shape}')
print (f'size : {x3.size}')

dim  : 3
shape: (3, 4, 5)
size : 60


In [15]:
a = np.arange(10)
print ('array: ', a)
print (a[:5])
print (a[5:])
print (a[3], a[-3])
print (a[2:8:2])
print('reverse: ', a[:: -1])
print ([ i for i in reversed(a) ])

array:  [0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4]
[5 6 7 8 9]
3 7
[2 4 6]
reverse:  [9 8 7 6 5 4 3 2 1 0]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]


In [16]:
a = np.arange(24).reshape(4,6)
print ('array :\n', a)
print (a[0,0])
print (a[0])
print (a[:,2])
print (a[::2,0])
print (a[0, ::2])
print (a[::2, ::2])


array :
 [[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
0
[0 1 2 3 4 5]
[ 2  8 14 20]
[ 0 12]
[0 2 4]
[[ 0  2  4]
 [12 14 16]]


In [17]:
a = np.arange(10)
print ('a :', a)
b = a ## This is shallow copy 
b[0] = 100 
print ('a: ', a)
print (np.may_share_memory(a,b)) # check if its deep or shallow copy

a : [0 1 2 3 4 5 6 7 8 9]
a:  [100   1   2   3   4   5   6   7   8   9]
True


In [18]:
a = np.arange(10)
print ('a :', a)
b = a.copy() ## This is deep copy 
b[0] = 100 
print ('a: ', a)
print (np.may_share_memory(a,b))

a : [0 1 2 3 4 5 6 7 8 9]
a:  [0 1 2 3 4 5 6 7 8 9]
False


In [19]:
# ufuncs - these are fast. use these instead of looping. 
# binary and unary ufuncs.
a = np.arange(10)
print ('a :', a)
print (a+10)
print (a-2)
print (-a)
print (a/2)
print (a//2) # floor 2
print (a % 3) # mod
print (a*2 + 2)
print (a**2)

a : [0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[-2 -1  0  1  2  3  4  5  6  7]
[ 0 -1 -2 -3 -4 -5 -6 -7 -8 -9]
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5]
[0 0 1 1 2 2 3 3 4 4]
[0 1 2 0 1 2 0 1 2 0]
[ 2  4  6  8 10 12 14 16 18 20]
[ 0  1  4  9 16 25 36 49 64 81]


In [20]:
# Reduce and accumulate

a = np.arange(10)
print ('array :', a)
print ('reduce :',np.add.reduce(a))
print ('accumulate: ', np.add.accumulate(a))
print (np.subtract.accumulate(a))

array : [0 1 2 3 4 5 6 7 8 9]
reduce : 45
accumulate:  [ 0  1  3  6 10 15 21 28 36 45]
[  0  -1  -3  -6 -10 -15 -21 -28 -36 -45]


In [21]:
a = np.arange(12)
print('array:\n', a)
a = a.reshape(3,4)
print('reshapped:\n', a)

array:
 [ 0  1  2  3  4  5  6  7  8  9 10 11]
reshapped:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [22]:
print ('transposed:\n', a.T)

transposed:
 [[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [23]:
# multi dimentional - Row, Col, ...
a[[0,1,2],[0,1,2]] # gets (0,0) (1,1), (2,2)

array([ 0,  5, 10])

In [24]:
b = np.random.rand(12).reshape(3,4)*10
b

array([[9.03203631, 8.42692579, 8.82909384, 9.06024561],
       [9.28399259, 7.96671262, 7.92366686, 2.12327728],
       [7.72258553, 3.07059179, 9.08503681, 1.88125918]])

In [25]:
np.maximum(a,b)

array([[ 9.03203631,  8.42692579,  8.82909384,  9.06024561],
       [ 9.28399259,  7.96671262,  7.92366686,  7.        ],
       [ 8.        ,  9.        , 10.        , 11.        ]])

In [26]:
a+b

array([[ 9.03203631,  9.42692579, 10.82909384, 12.06024561],
       [13.28399259, 12.96671262, 13.92366686,  9.12327728],
       [15.72258553, 12.07059179, 19.08503681, 12.88125918]])

In [27]:
np.arange(0,10,0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2,
       1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. , 2.1, 2.2, 2.3, 2.4, 2.5,
       2.6, 2.7, 2.8, 2.9, 3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8,
       3.9, 4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1,
       5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. , 6.1, 6.2, 6.3, 6.4,
       6.5, 6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7,
       7.8, 7.9, 8. , 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9. ,
       9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9])

In [28]:
c = np.random.rand(3,3)*10
c

array([[7.38094721, 8.62465943, 3.31202145],
       [8.64541519, 3.26739348, 9.13370256],
       [4.62199233, 0.12835017, 4.86776413]])

In [29]:
c > 5

array([[ True,  True, False],
       [ True, False,  True],
       [False, False, False]])

In [30]:
print (f'mean of array is {c.mean()}')
print (f'standard diviattion of array is {c.std()}')
print (f'variance diviattion of array is {c.var()}')

print (f'max  of array is {c.max()}')
print (f'min  of array is {c.min()}')
print (f'sum  of array is {c.sum()}')
c.cumsum()

mean of array is 5.55358288240508
standard diviattion of array is 2.909182269664856
variance diviattion of array is 8.463341478132364
max  of array is 9.133702555989302
min  of array is 0.12835017204276156
sum  of array is 49.98224594164572


array([ 7.38094721, 16.00560664, 19.31762809, 27.96304327, 31.23043676,
       40.36413931, 44.98613164, 45.11448181, 49.98224594])

In [31]:
d = np.random.rand(10)*10
d

array([9.71311584, 6.98570882, 8.75115666, 8.52445854, 3.76060542,
       7.29246778, 3.79863167, 8.15692291, 0.25206676, 7.43989509])

In [32]:
d.sort() # inline sort
d

array([0.25206676, 3.76060542, 3.79863167, 6.98570882, 7.29246778,
       7.43989509, 8.15692291, 8.52445854, 8.75115666, 9.71311584])

In [33]:
d = np.random.rand(10)*10
np.sort(d) # non-inline sort

array([0.54915668, 1.36020405, 2.27979528, 4.75603767, 5.3856947 ,
       5.94317616, 6.24615673, 7.01219439, 8.3486116 , 9.11497636])

In [34]:
e = np.array([1,1,2,2,3,3,4,5])
e
np.unique(e)

array([1, 2, 3, 4, 5])

In [35]:
# testing membership
np.in1d([2,4,6,8,10],e)

array([ True,  True, False, False, False])

In [36]:
np.union1d([2,4,8,10], e)

array([ 1,  2,  3,  4,  5,  8, 10])

In [37]:
np.save('sample-nparray',e)

In [38]:
np.load('sample-nparray.npy')

array([1, 1, 2, 2, 3, 3, 4, 5])

In [39]:
n = np.random.rand(3,3)*10
n

array([[2.27339165, 4.01043983, 5.27942052],
       [8.66838717, 1.13691095, 5.8152069 ],
       [0.43124041, 6.02999471, 9.28909131]])

In [40]:
np.diag(n)

array([2.27339165, 1.13691095, 9.28909131])

In [41]:
A = np.dot(n, np.diag(n)) # matrix mult - multiple ways ...
print(A)

A1 = n.dot(np.diag(n))
print (A1)

A2 = n @ np.diag(n)
print (A2)

[58.76884181 75.01719341 94.12316269]
[58.76884181 75.01719341 94.12316269]
[58.76884181 75.01719341 94.12316269]


In [42]:
np.linalg.eig(n) # eigen value

(array([-3.80746096,  1.68737731, 14.81947756]),
 array([[ 0.23678754, -0.43042401,  0.46645407],
        [-0.88545001, -0.69511117,  0.57952954],
        [ 0.39988742,  0.57580868,  0.66825602]]))

In [43]:
np.linalg.inv(n) # inverse of matrix

array([[ 0.25737749,  0.05691092, -0.18190722],
       [ 0.81938923, -0.19789025, -0.34181251],
       [-0.54385344,  0.12581799,  0.33798498]])

In [44]:
a = np.random.randint(10, size = (10))
print ('array :',a)

i = a.argsort()
print ('sorted indices: ', i)

a.sort() # in place sort - quick sort. O ( N log N )
print ('sorted :', a)

array : [9 7 4 4 6 1 5 4 8 6]
sorted indices:  [5 2 3 7 6 4 9 1 8 0]
sorted : [1 4 4 4 5 6 6 7 8 9]


In [45]:
x = np.arange(10)
print ('array: ', x)
mask_mult_3 = ( x / 3 ) == ( x // 3 ) # check if element is multiple of 3
print ('mastk_mult_3 :\n', mask_mult_3)

array:  [0 1 2 3 4 5 6 7 8 9]
mastk_mult_3 :
 [ True False False  True False False  True False False  True]


In [46]:
x = np.arange(10)
m2 = x % 2 == 0
m3 = x % 3 == 0
print ('m2 :', m2)
print ('m3 :', m3)
print ('m :', m2 | m3)

m2 : [ True False  True False  True False  True False  True False]
m3 : [ True False False  True False False  True False False  True]
m : [ True False  True  True  True False  True False  True  True]


In [47]:
x = np.arange(20)
print ('array    : ', x)
i = 3
m = ( x <= i ) | (x % i != 0)
print ('mask     : ', m)
print ('filtered : ', x[m])


array    :  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
mask     :  [ True  True  True  True  True  True False  True  True False  True  True
 False  True  True False  True  True False  True]
filtered :  [ 0  1  2  3  4  5  7  8 10 11 13 14 16 17 19]


In [48]:
# ref: https://numpy.org/doc/stable/reference/generated/numpy.frompyfunc.html
def myfunc(x, y):
    o = x*y + x + y
    print (f'myfunc({x},{y}): {o}')
    return o

A = rng.integers(0,10,size = (3,4))
B = rng.integers(10,20,size = A.shape)
print ('A:\n', A)
print ('B:\n', B)

myfunc_ptr = np.frompyfunc(myfunc, 2, 1) # 2 is no. input args, 1 is num out args
C = myfunc_ptr (A, B)
print ("C:\n", C)

A:
 [[6 1 3 9]
 [0 3 7 3]
 [8 0 1 9]]
B:
 [[18 19 18 11]
 [12 11 19 16]
 [19 10 19 12]]
myfunc(6,18): 132
myfunc(1,19): 39
myfunc(3,18): 75
myfunc(9,11): 119
myfunc(0,12): 12
myfunc(3,11): 47
myfunc(7,19): 159
myfunc(3,16): 67
myfunc(8,19): 179
myfunc(0,10): 10
myfunc(1,19): 39
myfunc(9,12): 129
C:
 [[132 39 75 119]
 [12 47 159 67]
 [179 10 39 129]]


In [49]:
# broadcasting
A = np.arange(12).reshape(3,4)
print (A)
print (A+3)
B = [1, 1, 2, 2]
print (A+B)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
[[ 1  2  4  5]
 [ 5  6  8  9]
 [ 9 10 12 13]]


In [50]:
A = np.arange(12).reshape(3,4)
print ("A:\n", A)
print (f'Max(A) : {A.max()}')
print (f'colmax(A) : {np.max(A, axis = 0)}')
print (f'rowmax(A) : {np.max(A, axis = 1)}')

A:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Max(A) : 11
colmax(A) : [ 8  9 10 11]
rowmax(A) : [ 3  7 11]


In [51]:
A = np.arange(12).reshape(3,4)
nc = A.shape[1]
print(A)
print (f'ncols {nc}')
for n in range(nc):
    C = A[:,n]*n
    print (C)


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
ncols 4
[0 0 0]
[1 5 9]
[ 4 12 20]
[ 9 21 33]


In [52]:
L = [0, 1, 2, 3, 4, 5, 6, 7, 8 ]
A = np.array(L).reshape(3,3)
print (A)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [53]:
# np.where - ternary operation
a = np.arange(10)
print(a)
b = np.zeros(10)
c = np.ones(10)
# like if-then-else - can be scalar
a1 = np.where(a > 5, 0, 1)
print(a1)

# or array
a2 = np.where(a%2==0, b, c)
print(a2)

# or combination of them
a3 = np.where(a%3==0, a*3, 3)
print(a3)

[0 1 2 3 4 5 6 7 8 9]
[1 1 1 1 1 1 0 0 0 0]
[0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
[ 0  3  3  9  3  3 18  3  3 27]


In [54]:
a = np.arange(1, 20, 2)
print(a)
print(np.any(a>10), np.any(a<0))
print(np.all(a>1), np.all(a<20))


[ 1  3  5  7  9 11 13 15 17 19]
True False
False True


In [55]:
# sum on boolean - count # True
a = np.arange(10)
print(a)
print(a.sum())

b = a>5
print (b)
print (b.sum()) # number of true values.

# so to count # entries matching a conditon... use chain
print ((a%2 == 0).sum())

[0 1 2 3 4 5 6 7 8 9]
45
[False False False False False False  True  True  True  True]
4
5


## Concat / stacking

In [64]:
a = np.arange(12).reshape(3,4)
display(a)

a1 = np.concatenate([a,a])
display(a1)

a2 = np.concatenate([a,a], axis =1 )
display(a2)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [68]:

pd.concat([a,a])

TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid