# WORKING WITH NUMPY

- NumPy stands for *numerical python*
- It is used in creating n-dimensional arrays
- Which means, its for creating linear algebra models for data analytics
- Install numpy: `pip install numpy`
- Before we use a library we need to import it: `import numpy` or `import numpy as np`

## NUMPY BASICS

### Importing NumPy

In [1]:
# importing numpy
import numpy as np

### Creating Arrays

In [2]:
# creating 1D array using Python list
arr1 = np.array([1, 2, 3])
print(type(arr1))
print(arr1)

<class 'numpy.ndarray'>
[1 2 3]


In [3]:
# creating 2D array using Python list of lists
arr2 = np.array([[1, 2], [3, 4]])
print(arr2)

[[1 2]
 [3 4]]


In [4]:
# Creating 1D array using Python tuple object
arr3 = np.array((1, 2, 3, 4))
print(arr3)

[1 2 3 4]


### Array Attributes

In [5]:
arr = np.array([
    [1, 2, 3], 
    [4, 5, 6]
    ])

In [6]:
# shape attribute
arr.shape       # (2, 3)    -> 2 rows and 3 columns

(2, 3)

In [7]:
# ndim (number of dimensions) attribute
arr.ndim        # 2 -> number of dimensions

2

In [8]:
# size attribute - total elements
arr.size        # 6 -> total elements in the array

6

In [9]:
# dtype attribute - returns datatype of the elements
arr.dtype       # int64 -> data type

dtype('int64')

### Creating Special Arrays

In [10]:
# Zeros array: zeros((rows, columns))
np.zeros((2, 3))        # [[0., 0., 0.], [0., 0., 0.]]

array([[0., 0., 0.],
       [0., 0., 0.]])

In [11]:
# Ones array: ones((rows, columns))
np.ones((3, 5))     # creating an array with 3 rows and 5 columns, data type is float

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
# An array filled with a constant: full((rows, columns), element)
np.full((2, 8), 4)

array([[4, 4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4, 4]])

In [13]:
# Identity matrix: eye(dimension)
np.eye(3)       # 3x3 identity

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Creating Number Sequences

- Using *arange* (like *range* but returns NumPy array)

In [14]:
np.arange(5)        # [0, 1, 2, 3, 4]

array([0, 1, 2, 3, 4])

In [15]:
np.arange(1, 10, 2) # [1, 3, 5, 7, 9]

array([1, 3, 5, 7, 9])

- Using *linspace* (equally spaced numbers)

In [16]:
np.linspace(0, 1, 5)    # [0., 0.25, 0.5, 0.75, 1.]

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### Random Arrays

In [17]:
np.random.rand(2, 3)        # 2-rows and 3-columns: each element is Uniform [0, 1)

array([[0.56026871, 0.13934449, 0.13786226],
       [0.76779862, 0.68270025, 0.18950119]])

In [18]:
np.random.randn(2, 3)       # Normal distribution

array([[-1.79225657, -0.78201165,  1.10045611],
       [ 0.13092053, -0.16686159, -0.01313205]])

In [19]:
np.random.randint(0, 10, (2, 3))    # 2-rows, 3-columns, and Random ints 0-9

array([[1, 5, 8],
       [5, 0, 0]], dtype=int32)

### Changing Data Type

In [20]:
# astype(type) function is used to change the data type of an array
arr = np.array([1.2, 3.4, 5.6])
arr_int = arr.astype(int)   # [1, 3, 5]
arr_int

array([1, 3, 5])

## Indexing, Slicing & Reshaping

### Indexing (Accessing Elements)

- 1D Array:

In [21]:
arr = np.array([10, 20, 30, 40, 50])
print(arr[0])       # 10
print(arr[-1])      # 50 (last element)

10
50


- 2D Array:

In [22]:
arr2d = np.array([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])
print(arr2d[0, 1])      # 2 -> Row 1 and Column 2
print(arr2d[2, -1])     # 9 -> Row 3 and last Column

2
9


### Slicing (Extracting Ranges)

- 1D Array:

In [23]:
arr

array([10, 20, 30, 40, 50])

In [24]:
arr[1:4]        # [20, 30, 40]

array([20, 30, 40])

In [25]:
arr[:3]         # [10, 20, 30]

array([10, 20, 30])

In [26]:
arr[::2]        # [10, 30, 50]

array([10, 30, 50])

- 2D Array:

In [27]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [28]:
arr2d[0:2, 1:3]     # Rows 1-2, Cols 2-3    -> [[2, 3], [5, 6]]

array([[2, 3],
       [5, 6]])

In [29]:
arr2d[:, 0]     # [1, 4, 7] -> First column

array([1, 4, 7])

### Boolean Indexing

In [30]:
arr = np.array([1, 2, 3, 4, 5])
mask = arr > 3
print(mask)           # [False, False, False, True, True]
print(arr[mask])

[False False False  True  True]
[4 5]


### Fancy Indexing (Inde)

In [31]:
arr = np.array([10, 20, 30, 40, 50])
indices = [0, 3, 4]
print(arr[indices])     # [10, 40, 50]

[10 40 50]


In [32]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [33]:
arr2d[[0, 2], [1, 2]]       # picks (0, 1) -> 2 and (2, 2) -> 9

array([2, 9])

### Reshaping Arrays

In [34]:
arr = np.arange(1, 7)       # [1 2 3 4 5 6]
reshaped = arr.reshape((2, 3))
print(reshaped)

[[1 2 3]
 [4 5 6]]


### Flattening Arrays

In [35]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [36]:
arr2d.flatten()         # [1, 2, 3, 4, 5, 6, 7, 8, 9]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

### Adding / Removing Dimensions

In [37]:
arr = np.array([1, 2, 3])
arr

array([1, 2, 3])

In [38]:
arr_2d = arr[np.newaxis, :]      # Shape: (1, 3)
arr_2d

array([[1, 2, 3]])

In [39]:
arr_2d_col = arr[:, np.newaxis] # Shape: (3, 1)
arr_2d_col

array([[1],
       [2],
       [3]])

## Array Operations & Broadcasting

### Element-wise Arithmetic

- Between arrays:

In [40]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

In [41]:
print(a + b)        # [5 7 9]
print(a - b)        # [-3 -3 -3]
print(a * b)        # [4, 10 18]
print(a / b)        # [0.25 0.4 0.5]
print(a ** 2)       # [1 4 9]

[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]
[1 4 9]


- With scalars:

In [42]:
print(a + 10)       # [11, 12, 13]

[11 12 13]


In [43]:
print(a * 2)        # [2 4 6]

[2 4 6]


### Universal Functions (ufuncs)

- NumPy has vectorized math functions that apply element-wise

In [44]:
arr = np.array([1, 4, 9, 16])
arr

array([ 1,  4,  9, 16])

In [45]:
np.sqrt(arr)

array([1., 2., 3., 4.])

In [46]:
np.exp(arr)     # e^arr

array([2.71828183e+00, 5.45981500e+01, 8.10308393e+03, 8.88611052e+06])

In [47]:
np.log(arr)

array([0.        , 1.38629436, 2.19722458, 2.77258872])

In [48]:
np.sin(arr)

array([ 0.84147098, -0.7568025 ,  0.41211849, -0.28790332])

In [49]:
np.cos(arr)

array([ 0.54030231, -0.65364362, -0.91113026, -0.95765948])

### Aggregate Functions

In [50]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
print(arr.sum())        # 21 -> sum of all elements

21


In [53]:
arr.sum(axis=0)  # [5 7 9] -> sum of each column

array([5, 7, 9])

In [54]:
arr.sum(axis=1) # [6 15] -> sum of each row

array([ 6, 15])

In [55]:
print(arr.mean())
print(arr.min())
print(arr.max())
print(arr.std())

3.5
1
6
1.707825127659933


### Broadcasting (Different Shapes)
- Broadings is NumPy allows to perform arithmetic operations on array of different size

In [56]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [58]:
add_vector = np.array([10, 20, 30])
add_vector

array([10, 20, 30])

In [59]:
arr + add_vector

array([[11, 22, 33],
       [14, 25, 36]])

### Comparison Operators

In [60]:
arr = np.array([1, 2, 3, 4])

In [62]:
print(arr > 2)

[False False  True  True]


In [64]:
print(arr == 3)

[False False  True False]


In [65]:
arr

array([1, 2, 3, 4])

In [68]:
print(np.any(arr > 3))     # True - returns True if any element satisfies the condition
print(np.all(arr > 0))     # True - returns True if all elements satisfy the condition

True
True


In [69]:
### Clipping Values
arr = np.array([1, 5, 8, 10])
arr

array([ 1,  5,  8, 10])

In [70]:
np.clip(arr, 3, 8)

array([3, 5, 8, 8])

## Advanced Indexing, Sorting and Stacking

### Conditonal Selection (`where`)

In [72]:
arr = np.array([10, 20, 30, 40])
arr


array([10, 20, 30, 40])

In [73]:
result = np.where(arr > 25, 1, 0)
print(result)

[0 0 1 1]


### Extracting Non-Zero Elements

In [74]:
arr = np.array([0, 2, 0, 4, 5])
arr

array([0, 2, 0, 4, 5])

In [76]:
indices = np.nonzero(arr)

In [77]:
print(arr[indices])

[2 4 5]


### Sorting Arrays

In [78]:
arr = np.array([3, 1, 2])
print(np.sort(arr))

[1 2 3]


In [79]:
arr2d = np.array([[3, 1, 2], [9, 8, 7]])
np.sort(arr2d, axis=1)

array([[1, 2, 3],
       [7, 8, 9]])

#### Argsort (indices of sorted order)

In [80]:
arr

array([3, 1, 2])

In [81]:
np.argsort(arr)     # array of indices that would sort the array

array([1, 2, 0])

### Stacking Arrays

In [82]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

#### Vertical Stacking

In [83]:
np.vstack((a, b))

array([[1, 2, 3],
       [4, 5, 6]])

#### Horizontal Stacking

In [84]:
np.hstack((a, b))

array([1, 2, 3, 4, 5, 6])

#### Column Stack (2D columns)

In [85]:
print(a)
print(b)

[1 2 3]
[4 5 6]


In [86]:
np.column_stack((a, b))

array([[1, 4],
       [2, 5],
       [3, 6]])

### Splitting Arrays

#### Split into equal parts

In [87]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [88]:
np.split(arr, 5)

[array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7]), array([8, 9])]

#### Vertical Split

In [89]:
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
arr2d

array([[1, 2, 3],
       [4, 5, 6]])

In [90]:
np.vsplit(arr2d, 2)

[array([[1, 2, 3]]), array([[4, 5, 6]])]

### Unique and Set Operations

In [91]:
arr = np.array([1, 2, 2, 3, 4, 4])
arr

array([1, 2, 2, 3, 4, 4])

In [92]:
np.unique(arr)

array([1, 2, 3, 4])

In [93]:
np.intersect1d([1, 2], [2, 3])

array([2])

In [94]:
np.union1d([1, 2], [2, 3])

array([1, 2, 3])

In [95]:
np.setdiff1d([1, 2, 3], [2, 3, 4])

array([1])

## Linear Algebra, Statistics & Saving/Loading Data

### Matrix Multiplication

In [96]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print(A)
print(B)

[[1 2]
 [3 4]]
[[5 6]
 [7 8]]


In [98]:
print(np.dot(A, B))

[[19 22]
 [43 50]]


In [99]:
# or
print(A @ B)

[[19 22]
 [43 50]]


### Transpose & Inverse

In [100]:
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A)

[[1 2 3]
 [4 5 6]]


In [102]:
print(A.T)      # The T here stands for transpose

[[1 4]
 [2 5]
 [3 6]]


In [104]:
square_matrix = np.array([[1, 2], [3, 4]])
inv_matrix = np.linalg.inv(square_matrix)
print(square_matrix)
print(inv_matrix)

[[1 2]
 [3 4]]
[[-2.   1. ]
 [ 1.5 -0.5]]


In [108]:
print(np.dot(square_matrix, inv_matrix))   # should give identity matrix

[[1.0000000e+00 0.0000000e+00]
 [8.8817842e-16 1.0000000e+00]]


### Determinant

In [106]:
np.linalg.det(square_matrix)

np.float64(-2.0000000000000004)