Topics covered:

- np.sort
- np.append
- np.concatenate
- np.unique
- np.expand_dims
- np.where
- np.argmax
- np.cumsum
- np.percentile
- np.histogram
- np.corrcoef
- np.isin
- np.flip
- np.put
- np.delete
- np.union1d
- np.intersect1d
- np.setdiff1d
- np.setxor1d
- np.in1d
- np.clip


### np.sort

Return a sorted copy of an array.

https://numpy.org/doc/stable/reference/generated/numpy.sort.html

In [1]:
# code
import numpy as np
a = np.random.randint(1,100,15)
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [2]:
b = np.random.randint(1,100,24).reshape(6,4)
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [3]:
np.sort(a)   # sort in ascending order

array([10, 21, 26, 30, 38, 38, 41, 50, 51, 52, 52, 53, 59, 65, 72])

In [4]:
np.sort(a)[::-1]  # sort in descending order

array([72, 65, 59, 53, 52, 52, 51, 50, 41, 38, 38, 30, 26, 21, 10])

In [5]:
np.sort(b)   # default -> row-wise ascending sort

array([[10, 32, 37, 68],
       [54, 67, 80, 83],
       [ 4, 14, 47, 76],
       [ 4, 30, 39, 70],
       [ 4, 35, 74, 98],
       [31, 57, 62, 94]])

In [6]:
np.sort(b,axis=0)  # column-wise ascending sort

array([[32,  4, 30,  4],
       [47,  4, 57, 14],
       [62, 10, 68, 35],
       [67, 31, 74, 37],
       [70, 39, 76, 54],
       [98, 80, 83, 94]])

In [7]:
np.sort(b,axis=0)[::-1]  # column-wise descending sort

array([[98, 80, 83, 94],
       [70, 39, 76, 54],
       [67, 31, 74, 37],
       [62, 10, 68, 35],
       [47,  4, 57, 14],
       [32,  4, 30,  4]])

In [12]:
np.sort(b,axis=1)  # row-wise ascending sort

array([[10, 32, 37, 68],
       [54, 67, 80, 83],
       [ 4, 14, 47, 76],
       [ 4, 30, 39, 70],
       [ 4, 35, 74, 98],
       [31, 57, 62, 94]])

In [28]:
np.sort(b)[::,::-1]  # row-wise descending order

array([[68, 37, 32, 10],
       [83, 80, 67, 54],
       [76, 47, 14,  4],
       [70, 39, 30,  4],
       [98, 74, 35,  4],
       [94, 62, 57, 31]])

In [None]:
# 0 for column, 1 for row

### np.append

The numpy.append() appends values along the mentioned axis at the end of the array

https://numpy.org/doc/stable/reference/generated/numpy.append.html

In [29]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [30]:
np.append(a,200)

array([ 59,  38,  38,  52,  51,  52,  53,  30,  72,  65,  21,  26,  50,
        41,  10, 200])

In [31]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [32]:
np.append(b,200)  # appending new element

array([ 32,  10,  68,  37,  67,  80,  83,  54,  47,   4,  76,  14,  70,
        39,  30,   4,  98,   4,  74,  35,  62,  31,  57,  94, 200])

In [33]:
np.append(b,np.ones((b.shape[0],1)),axis=1)   # appending column

array([[32., 10., 68., 37.,  1.],
       [67., 80., 83., 54.,  1.],
       [47.,  4., 76., 14.,  1.],
       [70., 39., 30.,  4.,  1.],
       [98.,  4., 74., 35.,  1.],
       [62., 31., 57., 94.,  1.]])

In [38]:
np.append(b,np.ones((1,b.shape[1])),axis=0)  # appending row

array([[32., 10., 68., 37.],
       [67., 80., 83., 54.],
       [47.,  4., 76., 14.],
       [70., 39., 30.,  4.],
       [98.,  4., 74., 35.],
       [62., 31., 57., 94.],
       [ 1.,  1.,  1.,  1.]])

In [45]:
np.append(b,np.random.random((b.shape[0],1)),axis=1)

array([[32.        , 10.        , 68.        , 37.        ,  0.17320995],
       [67.        , 80.        , 83.        , 54.        ,  0.63109237],
       [47.        ,  4.        , 76.        , 14.        ,  0.3309528 ],
       [70.        , 39.        , 30.        ,  4.        ,  0.32212962],
       [98.        ,  4.        , 74.        , 35.        ,  0.9860406 ],
       [62.        , 31.        , 57.        , 94.        ,  0.83274141]])

### np.concatenate

numpy.concatenate() function concatenate a sequence of arrays along an existing axis.

https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html

In [46]:
# code
c = np.arange(6).reshape(2,3)
d = np.arange(6,12).reshape(2,3)

print(c)
print(d)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]


In [47]:
np.concatenate((c,d),axis=0)   # column-wise concatenate

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [48]:
np.concatenate((c,d),axis=1)   # row-wise concatenate

array([[ 0,  1,  2,  6,  7,  8],
       [ 3,  4,  5,  9, 10, 11]])

### np.unique

With the help of np.unique() method, we can get the unique values from an array given as parameter in np.unique() method.

https://numpy.org/doc/stable/reference/generated/numpy.unique.html/

In [50]:
# code
e = np.array([1,1,2,2,3,3,4,4,5,5,6,6])
e

array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6])

In [51]:
np.unique(e)

array([1, 2, 3, 4, 5, 6])

### np.expand_dims

With the help of Numpy.expand_dims() method, we can get the expanded dimensions of an array

https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html

Expand dimension of an array: 1D->2D, 2D->3D, 3D->4D, ...

In [55]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [56]:
a.shape

(15,)

In [57]:
np.expand_dims(a,axis=0)

array([[59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10]])

In [58]:
np.expand_dims(a,axis=0).shape

(1, 15)

In [59]:
np.expand_dims(a,axis=1)

array([[59],
       [38],
       [38],
       [52],
       [51],
       [52],
       [53],
       [30],
       [72],
       [65],
       [21],
       [26],
       [50],
       [41],
       [10]])

In [60]:
np.expand_dims(a,axis=1).shape

(15, 1)

### np.where

The numpy.where() function returns the indices of elements in an input array where the given condition is satisfied.

https://numpy.org/doc/stable/reference/generated/numpy.where.html

In [61]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [63]:
# find all indices with value greater than 50
np.where(a>50)

(array([0, 3, 4, 5, 6, 8, 9]),)

In [64]:
# replace all values > 50 with 0
# np.where(condition,true,false)

np.where(a>50,0,a)

array([ 0, 38, 38,  0,  0,  0,  0, 30,  0,  0, 21, 26, 50, 41, 10])

In [65]:
# replace even numbers with 0
np.where(a%2==0,0,a)

array([59,  0,  0,  0, 51,  0, 53,  0,  0, 65, 21,  0,  0, 41,  0])

### np.argmax

The numpy.argmax() function returns indices of the max element of the array in a particular axis.

https://numpy.org/doc/stable/reference/generated/numpy.argmax.html

In [66]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [67]:
np.argmax(a)

8

In [68]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [69]:
np.argmax(b)   # index of max element in flattened array

16

In [70]:
np.argmax(b,axis=0)  # index of max element in column

array([4, 1, 1, 5])

In [71]:
np.argmax(b,axis=1)  # index of max element in row

array([2, 2, 2, 0, 0, 3])

In [73]:
# np.argmin
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [74]:
np.argmin(a)

14

### np.cumsum

numpy.cumsum() function is used when we want to compute the cumulative sum of array elements over a given axis.

https://numpy.org/doc/stable/reference/generated/numpy.cumsum.html

In [75]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [76]:
np.cumsum(a)

array([ 59,  97, 135, 187, 238, 290, 343, 373, 445, 510, 531, 557, 607,
       648, 658])

In [77]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [78]:
np.cumsum(b)

array([  32,   42,  110,  147,  214,  294,  377,  431,  478,  482,  558,
        572,  642,  681,  711,  715,  813,  817,  891,  926,  988, 1019,
       1076, 1170])

In [79]:
np.cumsum(b,axis=0)

array([[ 32,  10,  68,  37],
       [ 99,  90, 151,  91],
       [146,  94, 227, 105],
       [216, 133, 257, 109],
       [314, 137, 331, 144],
       [376, 168, 388, 238]])

In [80]:
np.cumsum(b,axis=1)

array([[ 32,  42, 110, 147],
       [ 67, 147, 230, 284],
       [ 47,  51, 127, 141],
       [ 70, 109, 139, 143],
       [ 98, 102, 176, 211],
       [ 62,  93, 150, 244]])

In [81]:
# np.cumprod
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [82]:
np.cumprod(a)

array([                  59,                 2242,                85196,
                    4430192,            225939792,          11748869184,
               622690066752,       18680702002560,     1345010544184320,
          87425685371980800,  1835939392811596800, -7605808008027138048,
        7091225146543681536, -4407674171061882880, -7183253563199725568])

### np.percentile

numpy.percentile()function used to compute the nth percentile of the given data (array elements) along the specified axis.

https://numpy.org/doc/stable/reference/generated/numpy.percentile.html

In [83]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [84]:
np.percentile(a,100)

72.0

In [85]:
np.percentile(a,0)

10.0

In [86]:
np.percentile(a,50)

50.0

In [87]:
np.median(a)

50.0

In [88]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [89]:
np.percentile(b,100)

98.0

In [92]:
np.percentile(b,50,axis=0)

array([64.5, 20.5, 71. , 36. ])

### np.histogram

Numpy has a built-in numpy.histogram() function which represents the frequency of data distribution in the graphical form.

https://numpy.org/doc/stable/reference/generated/numpy.histogram.html

In [93]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [94]:
np.histogram(a,bins=[0,10,20,30,40,50,60,70,80,90,100])

(array([0, 1, 2, 3, 1, 6, 1, 1, 0, 0]),
 array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100]))

In [96]:
np.histogram(a,bins=[0,25,50,75,100])

(array([2, 5, 8, 0]), array([  0,  25,  50,  75, 100]))

In [97]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [98]:
np.histogram(b,bins=[0,25,50,75,100])

(array([5, 7, 7, 5]), array([  0,  25,  50,  75, 100]))

In [100]:
np.histogram(b,bins=[0,25,50,75,100],axis=0)   # row/column-wise is not possible

TypeError: histogram() got an unexpected keyword argument 'axis'

### np.corrcoef

Return Pearson product-moment correlation coefficients.

https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html

In [101]:
salary = np.array([20000,40000,25000,35000,60000])
experience = np.array([1,3,2,4,2])

np.corrcoef(salary,experience)

array([[1.        , 0.25344572],
       [0.25344572, 1.        ]])

### np.isin

With the help of numpy.isin() method, we can see that one array having values are checked in a different numpy array having different elements with different sizes.

https://numpy.org/doc/stable/reference/generated/numpy.isin.html

In [103]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [104]:
items = [10,20,30,40,50,60,70,80,90,100]

np.isin(a,items)

array([False, False, False, False, False, False, False,  True, False,
       False, False, False,  True, False,  True])

In [105]:
a[np.isin(a,items)]

array([30, 50, 10])

### np.flip

The numpy.flip() function reverses the order of array elements along the specified axis, preserving the shape of the array.

https://numpy.org/doc/stable/reference/generated/numpy.flip.html

In [107]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [108]:
np.flip(a)

array([10, 41, 50, 26, 21, 65, 72, 30, 53, 52, 51, 52, 38, 38, 59])

In [109]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [110]:
np.flip(b)   # flip by both axis x and y

array([[94, 57, 31, 62],
       [35, 74,  4, 98],
       [ 4, 30, 39, 70],
       [14, 76,  4, 47],
       [54, 83, 80, 67],
       [37, 68, 10, 32]])

In [111]:
np.flip(b,axis=0)    # mirror image column-wise (flip by x-axis)

array([[62, 31, 57, 94],
       [98,  4, 74, 35],
       [70, 39, 30,  4],
       [47,  4, 76, 14],
       [67, 80, 83, 54],
       [32, 10, 68, 37]])

In [112]:
np.flip(b,axis=1)  # mirror image row-wise (flip by y-axis)

array([[37, 68, 10, 32],
       [54, 83, 80, 67],
       [14, 76,  4, 47],
       [ 4, 30, 39, 70],
       [35, 74,  4, 98],
       [94, 57, 31, 62]])

### np.put

The numpy.put() function replaces specific elements of an array with given values of p_array. Array indexed works on flattened array.

https://numpy.org/doc/stable/reference/generated/numpy.put.html

In [None]:
# np.put(array,indices to change,new values) -> makes permanent changes

In [114]:
a

array([59, 38, 38, 52, 51, 52, 53, 30, 72, 65, 21, 26, 50, 41, 10])

In [115]:
np.put(a,[1,5,9],[100,200,300])

In [116]:
a

array([ 59, 100,  38,  52,  51, 200,  53,  30,  72, 300,  21,  26,  50,
        41,  10])

In [117]:
b

array([[32, 10, 68, 37],
       [67, 80, 83, 54],
       [47,  4, 76, 14],
       [70, 39, 30,  4],
       [98,  4, 74, 35],
       [62, 31, 57, 94]])

In [118]:
np.put(b,[1,5,9],[100,200,300])

In [119]:
b

array([[ 32, 100,  68,  37],
       [ 67, 200,  83,  54],
       [ 47, 300,  76,  14],
       [ 70,  39,  30,   4],
       [ 98,   4,  74,  35],
       [ 62,  31,  57,  94]])

### np.delete

The numpy.delete() function returns a new array with the deletion of sub-arrays along with the mentioned axis.

https://numpy.org/doc/stable/reference/generated/numpy.delete.html

In [120]:
a

array([ 59, 100,  38,  52,  51, 200,  53,  30,  72, 300,  21,  26,  50,
        41,  10])

In [121]:
np.delete(a,[0,2,4,6])

array([100,  52, 200,  30,  72, 300,  21,  26,  50,  41,  10])

In [122]:
b

array([[ 32, 100,  68,  37],
       [ 67, 200,  83,  54],
       [ 47, 300,  76,  14],
       [ 70,  39,  30,   4],
       [ 98,   4,  74,  35],
       [ 62,  31,  57,  94]])

In [123]:
np.delete(b,1,axis=0)

array([[ 32, 100,  68,  37],
       [ 47, 300,  76,  14],
       [ 70,  39,  30,   4],
       [ 98,   4,  74,  35],
       [ 62,  31,  57,  94]])

In [124]:
np.delete(b,1,axis=1)

array([[32, 68, 37],
       [67, 83, 54],
       [47, 76, 14],
       [70, 30,  4],
       [98, 74, 35],
       [62, 57, 94]])

In [129]:
np.delete(b,[1,3,5,7,9,10,13,15])

array([32, 68, 67, 83, 47, 14, 70, 30, 98,  4, 74, 35, 62, 31, 57, 94])

### Set functions

- np.union1d
- np.intersect1d
- np.setdiff1d
- np.setxor1d
- np.in1d

In [130]:
m = np.array([1,2,3,4,5])
n = np.array([3,4,5,6,7])

In [131]:
np.union1d(m,n)

array([1, 2, 3, 4, 5, 6, 7])

In [132]:
np.intersect1d(m,n)

array([3, 4, 5])

In [133]:
np.setdiff1d(m,n)

array([1, 2])

In [134]:
np.setdiff1d(n,m)

array([6, 7])

In [135]:
np.setxor1d(m,n)

array([1, 2, 6, 7])

In [137]:
np.in1d(m,1)

array([ True, False, False, False, False])

In [140]:
np.in1d(n,1)

array([False, False, False, False, False])

In [141]:
n[np.in1d(n,1)]

array([], dtype=int64)

In [142]:
m[np.in1d(m,1)]

array([1])

### np.clip

numpy.clip() function is used to Clip (limit) the values in an array.

https://numpy.org/doc/stable/reference/generated/numpy.clip.html

In [143]:
a

array([ 59, 100,  38,  52,  51, 200,  53,  30,  72, 300,  21,  26,  50,
        41,  10])

In [144]:
np.clip(a,a_min=25,a_max=75)

array([59, 75, 38, 52, 51, 75, 53, 30, 72, 75, 25, 26, 50, 41, 25])

In [None]:
# 17. np.swapaxes

In [None]:
# 18. np.uniform

In [None]:
# 19. np.count_nonzero

In [None]:
# 21. np.tile
# https://www.kaggle.com/code/abhayparashar31/best-numpy-functions-for-data-science-50?scriptVersionId=98816580

In [None]:
# 22. np.repeat
# https://towardsdatascience.com/10-numpy-functions-you-should-know-1dc4863764c5

In [None]:
# 25. np.allclose and equals