# **NumPy Introduction**
NumPy (Numerical Python) is a fundamental package for scientific computing with Python. It provides support for arrays, matrices, and a large library of mathematical functions to operate on these data structures.

In [2]:
# install by running this code
#!pip install numpy

In [3]:
import numpy as np

## **Array**
An array is a central data structure known as the ndarray (short for N-dimensional array). It is a grid of values, all of the same type, indexed by a tuple of non-negative integers.

In [5]:
a = np.arange(6)
print(a)
a.shape

[0 1 2 3 4 5]


(6,)

In [7]:
a2 = a[np.newaxis, :]
print(a2)

[[0 1 2 3 4 5]]


In [8]:
a2.shape

(1, 6)

In [9]:
a2 = a[:, np.newaxis]
print(a2)
a2.shape

[[0]
 [1]
 [2]
 [3]
 [4]
 [5]]


(6, 1)

In [10]:
a3 = a2[np.newaxis, :]
print(a3)
a3.shape

[[[0]
  [1]
  [2]
  [3]
  [4]
  [5]]]


(1, 6, 1)

## **Why do we have 1D, 2D or 3D arrays and where do we need them in Data Science?**

## **Creating Arrays with Numpy**

In [11]:
a = np.array([1,2,3,4,5,6,7])
b = np.array([(1,2,3,4,5,6,7),(4,5,6,7,8,9,10)])

In [12]:
a.dtype #data type of array elements

dtype('int64')

In [16]:
b.dtype

dtype('int64')

In [14]:
type(a) # type of array

numpy.ndarray

In [15]:
type(b)

numpy.ndarray

In [18]:
print(a)
a.shape

[1 2 3 4 5 6 7]


(7,)

In [19]:
print(b)
b.shape

[[ 1  2  3  4  5  6  7]
 [ 4  5  6  7  8  9 10]]


(2, 7)

## **Initialize arrays**

In [20]:
zeros = np.zeros((2,5))
zeros

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [21]:
zeros.dtype

dtype('float64')

In [22]:
ones = np.ones((2,5))
ones

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [23]:
ones.dtype

dtype('float64')

In [24]:
full = np.full((2,5),7.5)
full

array([[7.5, 7.5, 7.5, 7.5, 7.5],
       [7.5, 7.5, 7.5, 7.5, 7.5]])

In [25]:
full.dtype

dtype('float64')

In [26]:
identity = np.eye(5)
identity

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [27]:
identity.dtype

dtype('float64')

In [46]:
empty = np.empty((2))
print(empty)
print(empty[1])

[-5.73021895e-300  6.52507486e-310]
6.52507486427384e-310


In [50]:
empty = np.empty((3, 4))
print(empty)
print("---------------")
print(empty[1])

[[4.98451544e-310 0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000]]
---------------
[0. 0. 0. 0.]


In [52]:
x = np.arange(23)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22])

In [53]:
# array of even numbers from 0 to 22
even = np.arange(0, 23, 2)
even

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

In [54]:
# array of odd numbers from 0 to 23
odd = np.arange(1,24,2)
odd

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23])

In [55]:
# specific difference between numbers
diff = np.arange(0, 23, 3)
diff

array([ 0,  3,  6,  9, 12, 15, 18, 21])

In [57]:
lin = np.linspace(0,20, num=10)
lin

array([ 0.        ,  2.22222222,  4.44444444,  6.66666667,  8.88888889,
       11.11111111, 13.33333333, 15.55555556, 17.77777778, 20.        ])

In [58]:
lin = np.linspace(0, 10, num=5)
lin

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [59]:
lin = np.linspace(0, -10, num=10)
lin

array([  0.        ,  -1.11111111,  -2.22222222,  -3.33333333,
        -4.44444444,  -5.55555556,  -6.66666667,  -7.77777778,
        -8.88888889, -10.        ])

In [61]:
lin = np.linspace(0, 10, num=15)
lin

array([ 0.        ,  0.71428571,  1.42857143,  2.14285714,  2.85714286,
        3.57142857,  4.28571429,  5.        ,  5.71428571,  6.42857143,
        7.14285714,  7.85714286,  8.57142857,  9.28571429, 10.        ])

In [62]:
lin.dtype

dtype('float64')

In [63]:
lin = np.linspace(0, 10, num=15, dtype=np.int64) # numbers are rounded off so they make sense
lin

array([ 0,  0,  1,  2,  2,  3,  4,  5,  5,  6,  7,  7,  8,  9, 10])

In [65]:
# define the dtype of array
x = np.ones(2, dtype = np.int64)
x

array([1, 1])

In [66]:
# define the dtype of array
x = np.ones(2, dtype = np.float64)
x

array([1., 1.])

In [67]:
# define the dtype of array
x = np.arange(0, 22.6, 1.5, dtype = np.float64)
x

array([ 0. ,  1.5,  3. ,  4.5,  6. ,  7.5,  9. , 10.5, 12. , 13.5, 15. ,
       16.5, 18. , 19.5, 21. , 22.5])

## **What is the difference between int32, 64 or float 32, 64, 16... What do they mean? Why do we use one and not the other one?**

The difference is the number of bits. A value expressed as a float32 is a floating-point number that occupies 32 bits of storage. Likewise, a float64 value is a floating-point number that occupies 64 bits of storage. We use them according to the range of numbers we have and if we want to save memory, we might use the data type with less storage.

In [68]:
# sorting an array
arr = np.array([2,1,5,3,7,4,6,8])
arr

array([2, 1, 5, 3, 7, 4, 6, 8])

In [69]:
arr = np.sort(arr)
arr

array([1, 2, 3, 4, 5, 6, 7, 8])

In [70]:
a = np.array([1,2,3,4,5])
b = np.array([6,7,8,9,10])

In [71]:
# concatenate these arrays
c = np.concatenate((a,b))
c

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

## **Array Attributes**

In [28]:
a.shape # gives us the shape of array

(7,)

In [29]:
len(a) # gives us the length of elements in array

7

In [30]:
a.ndim # gives us the dimension of array

1

In [31]:
b.ndim

2

In [32]:
identity.ndim

2

In [33]:
a.size # gives us the number of elements in array

7

## **Basic Operation**

In [34]:
a

array([1, 2, 3, 4, 5, 6, 7])

In [35]:
b

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 4,  5,  6,  7,  8,  9, 10]])

In [39]:
a + b # addition

array([[ 2,  4,  6,  8, 10, 12, 14],
       [ 5,  7,  9, 11, 13, 15, 17]])

In [40]:
# another method
np.add(a,b)

array([[ 2,  4,  6,  8, 10, 12, 14],
       [ 5,  7,  9, 11, 13, 15, 17]])

In [37]:
a - b # subtraction

array([[ 0,  0,  0,  0,  0,  0,  0],
       [-3, -3, -3, -3, -3, -3, -3]])

In [38]:
b - a

array([[0, 0, 0, 0, 0, 0, 0],
       [3, 3, 3, 3, 3, 3, 3]])

In [41]:
a * b # multiplication

array([[ 1,  4,  9, 16, 25, 36, 49],
       [ 4, 10, 18, 28, 40, 54, 70]])

In [42]:
a / b # division

array([[1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        ],
       [0.25      , 0.4       , 0.5       , 0.57142857, 0.625     ,
        0.66666667, 0.7       ]])

In [43]:
a

array([1, 2, 3, 4, 5, 6, 7])

In [44]:
# square of each element
a ** 2

array([ 1,  4,  9, 16, 25, 36, 49])

## **2D Array operations**

In [72]:
x = np.array([[1,2],[3,4]]) # 2x2 array
y = np.array([[5,6]]) # 1x2 array

In [73]:
# concatenate 2D arrays
z = np.concatenate((x,y))
z

array([[1, 2],
       [3, 4],
       [5, 6]])

In [74]:
# concatenate 2D arrays on axis 0
z = np.concatenate((x,y), axis = 0)
z

array([[1, 2],
       [3, 4],
       [5, 6]])

In [75]:
# concatenate 2D arrays on axis 1
w = np.array([[5,1],[7,1]])
x = np.array([[1,2], [3,4]])
z1 = np.concatenate((w,x), axis=1)
z1

array([[5, 1, 1, 2],
       [7, 1, 3, 4]])

In [76]:
x.ndim

2

In [77]:
# create a 3D array
array_example = np.array([[[0,1,2,3],
                           [4,5,6,7]],
                           [[0,1,2,3],
                            [4,5,6,7]],
                           [[0,1,2,3],
                            [4,5,6,7]]])
array_example.ndim

3

In [78]:
len(array_example)

3

In [79]:
array_example.shape

(3, 2, 4)

In [80]:
array_example.dtype

dtype('int64')

In [81]:
# create a 3D array of size (3, 1, 4)
a  = np.array([[[0, 1, 2, 3]],
               [[4, 5, 6, 7]],
               [[0, 1, 2, 3]]])
a.ndim

3

In [82]:
a.shape

(3, 1, 4)

In [83]:
a = a.reshape(6,2)
a.shape

(6, 2)

In [84]:
a

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [0, 1],
       [2, 3]])

In [85]:
a.reshape(3,2,2)

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]],

       [[0, 1],
        [2, 3]]])

In [86]:
a.reshape(3,1,4)

array([[[0, 1, 2, 3]],

       [[4, 5, 6, 7]],

       [[0, 1, 2, 3]]])

## **Indexing and slicing**

In [87]:
a = np.array([1,2,3,4,5,6,7,8])
a

array([1, 2, 3, 4, 5, 6, 7, 8])

In [88]:
a[2]

3

In [89]:
a[2:5]

array([3, 4, 5])

In [90]:
a[-4:]

array([5, 6, 7, 8])

In [91]:
a[-4:-2]

array([5, 6])

In [92]:
a[1:]

array([2, 3, 4, 5, 6, 7, 8])

In [93]:
b = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
b

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [94]:
b[2]

array([ 9, 10, 11, 12])

In [95]:
b[1:,1:]

array([[ 6,  7,  8],
       [10, 11, 12]])

In [96]:
b[b < 5] # like pandas filteration technique

array([1, 2, 3, 4])

In [97]:
b[b < 2]

array([1])

In [98]:
b[b <= 10]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [99]:
b[b%2 == 0] # filter even numbers

array([ 2,  4,  6,  8, 10, 12])


**Boolean Operators: AND, OR, NOT**

In [100]:
# use two conditions
b[(b > 2) & (b < 11)]

array([ 3,  4,  5,  6,  7,  8,  9, 10])

In [101]:
# use three conditions and filter the arrays
b[(b > 2) & (b < 11) & (b%2 == 0)]

array([ 4,  6,  8, 10])

In [102]:
b[(b > 2) | (b < 11)]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

**Making an array from a data or another array**

In [104]:
a = np.arange(0, 222, 5)
a

array([  0,   5,  10,  15,  20,  25,  30,  35,  40,  45,  50,  55,  60,
        65,  70,  75,  80,  85,  90,  95, 100, 105, 110, 115, 120, 125,
       130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190,
       195, 200, 205, 210, 215, 220])

In [105]:
len(a)

45

In [106]:
array1 = a[40:45]
array1

array([200, 205, 210, 215, 220])

In [107]:
a = np.array([1,2,3,4,5,6,7])
b = np.array([8,9,10,11,12,13,14])

In [108]:
c = np.vstack((a,b))
c

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14]])

In [109]:
d = np.hstack((a,b))
d

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [110]:
d = d.reshape(2,7)
d

array([[ 1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14]])

### **Matrices**

In [111]:
# create a 2x3 matrix
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [112]:
# create a matrix of big dimension
b = np.arange(0,100,2).reshape(5,10)
b

array([[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28, 30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48, 50, 52, 54, 56, 58],
       [60, 62, 64, 66, 68, 70, 72, 74, 76, 78],
       [80, 82, 84, 86, 88, 90, 92, 94, 96, 98]])

In [113]:
# create a matrix of big dimension
b = np.arange(0, 1000, 2).reshape(5,100)
b

array([[  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,
         26,  28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,
         52,  54,  56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
         78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102,
        104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128,
        130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154,
        156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180,
        182, 184, 186, 188, 190, 192, 194, 196, 198],
       [200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224,
        226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250,
        252, 254, 256, 258, 260, 262, 264, 266, 268, 270, 272, 274, 276,
        278, 280, 282, 284, 286, 288, 290, 292, 294, 296, 298, 300, 302,
        304, 306, 308, 310, 312, 314, 316, 318, 320, 322, 324, 326, 328,
        330, 332, 334, 336, 338, 340, 342, 344, 346, 348, 350, 352, 35

In [114]:
# create a matrix
b = np.arange(0,100,2).reshape(5,10)
b

array([[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28, 30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48, 50, 52, 54, 56, 58],
       [60, 62, 64, 66, 68, 70, 72, 74, 76, 78],
       [80, 82, 84, 86, 88, 90, 92, 94, 96, 98]])

In [115]:
# slicing a matrix of 3x2
c = b[0:3, 3:5]
c

array([[ 6,  8],
       [26, 28],
       [46, 48]])

In [116]:
b.max()

98

In [117]:
b.min()

0

In [118]:
b.sum()

2450

In [119]:
b.min(axis=0) # only for columns

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [121]:
b.max(axis=0)

array([80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

In [120]:
b.min(axis=1) # only for rows

array([ 0, 20, 40, 60, 80])

In [122]:
b.max(axis=1)

array([18, 38, 58, 78, 98])

In [123]:
m = np.arange(0,20,2).reshape(5,2)
m

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14],
       [16, 18]])

In [124]:
m + np.array([20,22]) # operation with each row

array([[20, 24],
       [24, 28],
       [28, 32],
       [32, 36],
       [36, 40]])

In [125]:
np.vstack((m,np.array([42,44])))

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14],
       [16, 18],
       [42, 44]])

In [126]:
m * np.array([20, 22])

array([[  0,  44],
       [ 80, 132],
       [160, 220],
       [240, 308],
       [320, 396]])

In [127]:
m / np.array([20, 22])

array([[0.        , 0.09090909],
       [0.2       , 0.27272727],
       [0.4       , 0.45454545],
       [0.6       , 0.63636364],
       [0.8       , 0.81818182]])

In [128]:
m % np.array([20, 22])

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14],
       [16, 18]])

In [129]:
m - np.array([20,22])

array([[-20, -20],
       [-16, -16],
       [-12, -12],
       [ -8,  -8],
       [ -4,  -4]])

In [130]:
b = np.arange(0,100,2).reshape(5,10)
b

array([[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28, 30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48, 50, 52, 54, 56, 58],
       [60, 62, 64, 66, 68, 70, 72, 74, 76, 78],
       [80, 82, 84, 86, 88, 90, 92, 94, 96, 98]])

In [131]:
np.flip(b)

array([[98, 96, 94, 92, 90, 88, 86, 84, 82, 80],
       [78, 76, 74, 72, 70, 68, 66, 64, 62, 60],
       [58, 56, 54, 52, 50, 48, 46, 44, 42, 40],
       [38, 36, 34, 32, 30, 28, 26, 24, 22, 20],
       [18, 16, 14, 12, 10,  8,  6,  4,  2,  0]])

# **Conclusion**

In this introduction to NumPy, we explored its essential features and functionalities that make it a cornerstone of scientific computing in Python. Here's a summary of what we covered:

- **Array Creation and Initialization**
  - 1D, 2D, and 3D arrays
  - Initialization with zeros, ones, and specific values

- **Data Types**
  - Different integer and floating-point types (int32, float64, etc.)

- **Array Operations**
  - Basic arithmetic operations
  - Sorting and concatenation

- **Array Attributes**
  - Shape, size, and dimension of arrays

- **Indexing and Slicing**
  - Accessing and filtering array elements

- **Matrix Operations**
  - Creating and manipulating matrices

By mastering these fundamental concepts, you'll be well-equipped to handle more complex tasks in data science and machine learning. NumPy's efficiency and versatility make it an indispensable library for any Python programmer working with numerical data.