### What is numpy?

NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.


At the core of the NumPy package, is the ndarray object. This encapsulates n-dimensional arrays of homogeneous data types

### Numpy Arrays Vs Python Sequences

- NumPy arrays have a fixed size at creation, unlike Python lists (which can grow dynamically). Changing the size of an ndarray will create a new array and delete the original.

- The elements in a NumPy array are all required to be of the same data type, and thus will be the same size in memory.

- NumPy arrays facilitate advanced mathematical and other types of operations on large numbers of data. Typically, such operations are executed more efficiently and with less code than is possible using Python’s built-in sequences.

- A growing plethora of scientific and mathematical Python-based packages are using NumPy arrays; though these typically support Python-sequence input, they convert such input to NumPy arrays prior to processing, and they often output NumPy arrays.

In [3]:
import numpy as np

### Creating Numpy Arrays

In [None]:
# np.array
import numpy as np

a = np.array([1,2,3,"dghsdjk"])
print(a)

['1' '2' '3' 'dghsdjk']


In [None]:

aE = np.array((1,2,3))
aE

array([1, 2, 3])

In [None]:
# 2D and 3D
import numpy as np
b = np.array([[1,2,3],[4,5,6],[4,5,6]])
print(b)

[[1 2 3]
 [4 5 6]
 [4 5 6]]


In [None]:
c = np.array([[[1,2,3],[3,4,5]],[[5,6,7],[7,8,9]]])
print(c)

[[[1 2 3]
  [3 4 5]]

 [[5 6 7]
  [7 8 9]]]


In [None]:
# create 2 new lists height and weight
person_height = [5.2,  5.4, 4.4, 4.5, 5.6, 6]
person_weight = [81, 55, 65, 70, 45, 44]

# create 2 numpy arrays from height and weight
person_height = np.array(person_height)
person_weight = np.array(person_weight)


In [None]:
# print 'person_height' array
person_height

array([5.2, 5.4, 4.4, 4.5, 5.6, 6. ])

In [None]:
# dtype
np.array([1,2,3],dtype=float)

array([1., 2., 3.])

In [None]:
# np.arange
np.arange(1,11,2)

array([1, 3, 5, 7, 9])

In [None]:
# with reshape
np.arange(16).reshape(2,8).T

array([[ 0,  8],
       [ 1,  9],
       [ 2, 10],
       [ 3, 11],
       [ 4, 12],
       [ 5, 13],
       [ 6, 14],
       [ 7, 15]])

In [None]:
np.arange(8).reshape()

In [None]:
# np.ones and np.zeros
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [None]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
# np.random
np.random.random()

0.8041437888642148

In [None]:
# np.linspace
np.linspace(-10,10,30,dtype=float)

array([-10.        ,  -9.31034483,  -8.62068966,  -7.93103448,
        -7.24137931,  -6.55172414,  -5.86206897,  -5.17241379,
        -4.48275862,  -3.79310345,  -3.10344828,  -2.4137931 ,
        -1.72413793,  -1.03448276,  -0.34482759,   0.34482759,
         1.03448276,   1.72413793,   2.4137931 ,   3.10344828,
         3.79310345,   4.48275862,   5.17241379,   5.86206897,
         6.55172414,   7.24137931,   7.93103448,   8.62068966,
         9.31034483,  10.        ])

In [None]:
# np.identity
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Numpy array vs Python lists

In [None]:
# speed
# list
a = [i for i in range(10000000)]
b = [i for i in range(10000000,20000000)]

c = []
import time

start = time.time()
for i in range(len(a)):
  c.append(a[i] + b[i])
#print(c)
print(time.time()-start)

2.4348888397216797


In [None]:
# numpy
import numpy as np
a = np.arange(10000000)
b = np.arange(10000000,20000000)

start = time.time()
c = a + b
print(time.time()-start)

0.382000207901001


In [None]:
# memory

#list
a = [i for i in range(10000000)]
import sys

sys.getsizeof(a)


89095160

In [None]:
#numpy
a = np.arange(10000000,dtype=np.int32)
sys.getsizeof(a)

40000112

### Array Attributes

In [None]:
a1 = np.arange(10,dtype=np.int32)
a2 = np.arange(12,dtype=float).reshape(3,4)
a3 = np.arange(8).reshape(2,2,2)
print(a1)
print(a2)
print(a3)

[0 1 2 3 4 5 6 7 8 9]
[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]
[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [None]:
a3 = np.arange(8).reshape(2,4)
a3

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [None]:
# ndim
a3.ndim

2

In [None]:
# shape
print(a3.shape)
a3

(2, 4)


array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [None]:
# size
a2
print(a2.size)
a2

12


array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [None]:
# itemsize
a3


array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [None]:
a3.itemsize

4

In [None]:
# dtype
print(a1.dtype)
print(a2.dtype)
print(a3.dtype)



int32
float64
int32


### Changing Datatype

In [None]:
# astype
a3.astype(np.int32)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

### Array Operations

In [None]:
a111 = np.arange(12).reshape(3,4)
a222 = np.arange(12,24).reshape(3,4)
a111
#a222

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
# scalar operations

# arithmetic
a111 ** 2

array([[  0,   1,   4,   9],
       [ 16,  25,  36,  49],
       [ 64,  81, 100, 121]])

In [None]:
# relational
a222

array([[12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [None]:
a2 == 15

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [4]:
arr1 = np.array([20,30,40,50])
arr2 = np.arange(4).reshape(2,2)
arr2

array([[0, 1],
       [2, 3]])

In [5]:
arr1+arr2

ValueError: operands could not be broadcast together with shapes (4,) (2,2) 

In [6]:
arr=np.array([1,2,3])*2
arr

array([2, 4, 6])

**Using Numpy with Comparison Expressions**

In [7]:
my_array = np.array([34, 45, 67, 45, 23])

# check which elements are greater than or equal to 40
# the comparison condition gives boolean output
new_array = my_array >= 40
new_array

array([False,  True,  True,  True, False])

In [8]:
my_array = np.array([50, 70, 67, 45, 23])
new = my_array>=45
new

array([ True,  True,  True,  True, False])

In [9]:
my_array[new]

array([50, 70, 67, 45])

### Array Functions

In [10]:
# given array
my_array = np.array([5,7,8,2,4])
my_array

array([5, 7, 8, 2, 4])

**sum():**<br>
sum() function adds all the values in the array and gives a scalar output.

In [11]:
# add all the elements of 'my_array'
my_array.sum()

np.int64(26)

In [12]:
# find minimum of 'my_array'
my_array.min()

np.int64(2)

In [13]:
# get cube of elements of 'my_array'
np.power(my_array,3)

array([125, 343, 512,   8,  64])

In [14]:
a1 = np.random.random((3,3))
a1 = np.round(a1*100)
a1

array([[ 6.,  1., 40.],
       [15., 48., 32.],
       [ 5., 22., 82.]])

In [15]:
# mean/median/std/var
np.var(a1,axis=1)

array([ 300.22222222,  181.55555556, 1090.88888889])

In [16]:
# trigonomoetric functions
np.sin(a1)

array([[-0.2794155 ,  0.84147098,  0.74511316],
       [ 0.65028784, -0.76825466,  0.55142668],
       [-0.95892427, -0.00885131,  0.31322878]])

In [19]:
# dot product
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(12,24).reshape(4,3)
print(a2)
print(a3)
#a2*a3
np.dot(a2,a3)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14]
 [15 16 17]
 [18 19 20]
 [21 22 23]]


array([[114, 120, 126],
       [378, 400, 422],
       [642, 680, 718]])

In [20]:
a1

array([[ 6.,  1., 40.],
       [15., 48., 32.],
       [ 5., 22., 82.]])

In [21]:
# round/floor/ceil

np.ceil(np.random.random((2,3))*100)

array([[ 1., 13., 19.],
       [44., 20., 78.]])

<
### Concatenation of Array

In [22]:
# concatenate two 1D arrays
array_x = np.array([11, 22, 13])
array_y = np.array([23, 22, 12])
np.concatenate([array_x, array_y])

array([11, 22, 13, 23, 22, 12])

In [23]:
# concatenate two 1D arrays
array_1 = np.array([11, 22, 13])
array_2 = np.array([23, 22, 12])
array_z = np.array([55, 44, 33])
np.concatenate([array_1,array_2,array_z])

array([11, 22, 13, 23, 22, 12, 55, 44, 33])

In [24]:
#You can also concatenate more than two arrays at once.
array_z = np.array([23,45])
print(np.concatenate([array_x, array_y, array_z]))

[11 22 13 23 22 12 23 45]


**Concatenate 2D array**

In [25]:
# create a 2D array
my_array = np.array([[1, 2, 3],
                 [4, 5, 6]])
my_array

array([[1, 2, 3],
       [4, 5, 6]])

In [26]:
# by default concatenate() is along 'axis = 0'
np.concatenate([my_array, my_array])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [27]:
np.concatenate([my_array, my_array], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

**Note:** One can not concatenate the arrays with different dimensions.

### Indexing and Slicing

In [31]:
aa1 = np.arange(10)
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(8).reshape(2,2,2)
a3


array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [32]:
aa1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
a2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [33]:
a2.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [34]:
a3


array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [35]:
a3.shape

(2, 2, 2)

In [36]:
a3[0,1]

array([2, 3])

In [37]:
a3[1,1,0]

np.int64(6)

In [38]:
aa1

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
a1

array([[ 6.,  1., 40.],
       [15., 48., 32.],
       [ 5., 22., 82.]])

In [None]:
a1[2:5]

array([[ 5., 22., 82.]])

In [41]:
a2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [42]:
a2[0:2,1::2]

array([[1, 3],
       [5, 7]])

In [43]:
a2


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [44]:
a2[1:4,1:4]

array([[ 5,  6,  7],
       [ 9, 10, 11]])

In [45]:
a2[1,::3]

array([4, 7])

In [46]:
a2[0,:]

array([0, 1, 2, 3])

In [47]:
a2[:,2]

array([ 2,  6, 10])

In [50]:
a2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [49]:
a2[1:,1:3]

array([[ 5,  6],
       [ 9, 10]])

### Reshaping

In [None]:
# reshape
a2

In [51]:
# Transpose
#np.transpose(a2)
a2.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [52]:
a3

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [53]:
# ravel
a3.ravel()

array([0, 1, 2, 3, 4, 5, 6, 7])

In [54]:
# Create a multi-dimensional array
arr = np.array([[1, 2, 3],
                [4, 5, 6]])

# Use ravel() to flatten the array
flattened_arr = arr.ravel()

print(flattened_arr)


[1 2 3 4 5 6]


In [55]:
arr


array([[1, 2, 3],
       [4, 5, 6]])

In [56]:
arr.flatten()

array([1, 2, 3, 4, 5, 6])

### Stacking

In [57]:
# horizontal stacking
a4 = np.arange(12).reshape(3,4)
a5 = np.arange(12,24).reshape(3,4)
a4

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [58]:
a5

array([[12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [59]:
np.hstack((a4,a5))

array([[ 0,  1,  2,  3, 12, 13, 14, 15],
       [ 4,  5,  6,  7, 16, 17, 18, 19],
       [ 8,  9, 10, 11, 20, 21, 22, 23]])

In [60]:
# Vertical stacking
np.vstack((a4,a5))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

### Splitting
Splitting is used to split the array into multiple sub-arrays. It is the opposite of concatenation, which is implemented by the functions like split(), hsplit(), and so on.

In [61]:
# split the array into sub-arrays
array_x = np.arange(10) 


In [62]:
array_x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [63]:
np.split(array_x, 2)

[array([0, 1, 2, 3, 4]), array([5, 6, 7, 8, 9])]

In [64]:
# the split occurs at 5th and 7th indices


array_y = np.split(array_x,[4,6])
print(array_y)

[array([0, 1, 2, 3]), array([4, 5]), array([6, 7, 8, 9])]


In [65]:
# split 'array_x' into 3 sub-arrays
np.split(array_x, 2)

[array([0, 1, 2, 3, 4]), array([5, 6, 7, 8, 9])]

**array_split():** It is used to split the array into sub-arrays. It takes the integer 'N' as the input for the number of splits, even if 'N' does not divide the array into sub-arrays of equal length.<br>

In [66]:
array_x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [67]:
# split 'array_x' into 3 sub-arrays using 'array_split'
np.array_split(array_x, 3)

[array([0, 1, 2, 3]), array([4, 5, 6]), array([7, 8, 9])]

We split the array of length 8 into 3 sub-arrays; the function 'array_split()' returns <i>8 % 3 (=2)</i> sub-arrays of size <i>8//3 + 1 (=3)</i> and the rest (i.e. one sub-array) of size <i>8//3 (=2)</i>.

**vsplit():**<br>
The vsplit() function is used to split an array into multiple sub-arrays vertically.

In [68]:
my_array = np.arange(20.0).reshape(4,5)
my_array

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., 19.]])

In [69]:
arr=np.arange(25).reshape(5,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [70]:
np.vsplit(arr,5)

[array([[0, 1, 2, 3, 4]]),
 array([[5, 6, 7, 8, 9]]),
 array([[10, 11, 12, 13, 14]]),
 array([[15, 16, 17, 18, 19]]),
 array([[20, 21, 22, 23, 24]])]

In [71]:
my_array

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., 19.]])

In [72]:
np.vsplit(my_array, 2)

[array([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]]),
 array([[10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.]])]

**hsplit():**<br>
The hsplit() function is used to split an array into multiple sub-arrays horizontally (column-wise).

In [73]:
my_array = np.arange(16.0).reshape(4,4)
my_array

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.],
       [12., 13., 14., 15.]])

In [74]:
np.hsplit(my_array, 2)

[array([[ 0.,  1.],
        [ 4.,  5.],
        [ 8.,  9.],
        [12., 13.]]),
 array([[ 2.,  3.],
        [ 6.,  7.],
        [10., 11.],
        [14., 15.]])]

### Advanced Indexing

In [75]:
# Normal Indexing and slicing

a = np.arange(24).reshape(6,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [76]:
a[1,2]

np.int64(6)

In [77]:
a[1:3,1:3]

array([[ 5,  6],
       [ 9, 10]])

In [78]:
# Fancy Indexing
a


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [79]:
a[:,[0,2,3]]#: select all rows.[0,2,3] 0,2,3 specific column

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11],
       [12, 14, 15],
       [16, 18, 19],
       [20, 22, 23]])

In [80]:
# Boolean Indexing
a = np.random.randint(1,100,24).reshape(6,4)
a

array([[89, 80, 21, 74],
       [48, 35, 98, 33],
       [21, 10, 60, 62],
       [15, 65, 76, 15],
       [ 4, 89, 57,  3],
       [85, 97, 77, 85]], dtype=int32)

In [81]:
# find all numbers greater than 50
a[a > 50]

array([89, 80, 74, 98, 60, 62, 65, 76, 89, 57, 85, 97, 77, 85],
      dtype=int32)

In [82]:
# find out even numbers
a[a % 2 == 0]

array([80, 74, 48, 98, 10, 60, 62, 76,  4], dtype=int32)

In [83]:
# find all numbers greater than 50 and are even

a[(a > 50) & (a % 2 == 0)]

array([80, 74, 98, 60, 62, 76], dtype=int32)

### Working with mathematical formulas

In [84]:
a = np.arange(10)
np.sin(a)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [85]:
# sigmoid
def sigmoid(array):
  return 1/(1 + np.exp(-(array)))


a = np.arange(100)

sigmoid(a)

array([0.5       , 0.73105858, 0.88079708, 0.95257413, 0.98201379,
       0.99330715, 0.99752738, 0.99908895, 0.99966465, 0.99987661,
       0.9999546 , 0.9999833 , 0.99999386, 0.99999774, 0.99999917,
       0.99999969, 0.99999989, 0.99999996, 0.99999998, 0.99999999,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.     

In [87]:
# mean squared error

actual = np.random.randint(1,50,25)
predicted = np.random.randint(1,50,25)
print(actual)
print(predicted)

[23  7  9  4 40 34 25 15 21 12 38  8 33 45 20 14 48 14 21 17 36 21 19  9
 21]
[44 31  9 31 12 45 33 47 19  9 34 37 25  3 43 15 33  3 39 47 21 23  2 24
 18]


In [88]:
def mse(actual,predicted):
  return np.mean((actual - predicted)**2)

mse(actual,predicted)

np.float64(371.56)

In [89]:
# binary cross entropy
np.mean((actual - predicted)**2)

np.float64(371.56)

In [90]:
actual

array([23,  7,  9,  4, 40, 34, 25, 15, 21, 12, 38,  8, 33, 45, 20, 14, 48,
       14, 21, 17, 36, 21, 19,  9, 21], dtype=int32)

### np.argmax

The numpy.argmax() function returns indices of the max element of the array in a particular axis.

In [91]:
a


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [92]:
np.argmax(a)

np.int64(99)

In [93]:
# np.argmin
np.argmin(a)

np.int64(0)

### np.delete()

In [94]:
import numpy as np

arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

# Delete the first row (axis=0)
result_axis0 = np.delete(arr, 2, axis=0)
result_axis0

array([[1, 2, 3],
       [4, 5, 6]])

In [95]:
# Output:
# array([[4, 5, 6],
#        [7, 8, 9]])

# Delete the second column (axis=1)
result_axis1 = np.delete(arr, 1, axis=1)
# Output:
# array([[1, 3],
#        [4, 6],
#        [7, 9]])

# Delete multiple rows
result_multiple_rows = np.delete(arr, [0, 2], axis=0)
# Output:
# array([[4, 5, 6]])

# Delete multiple columns
result_multiple_cols = np.delete(arr, [0, 2], axis=1)
# Output:
# array([[2],
#        [5],
#        [8]])

