- We will see Numpy array manipulation -- Access data, subarrays, split, reshape, join arrays etc

# Defining Numpy arrays

In [4]:
import numpy as np
np.random.seed(0)  # Set seed for reproducibility

# 1. Random int array
x_random = np.random.randint(10, size=5)

x_random

array([5, 0, 3, 3, 7])

In [5]:
# 2. Array from a given range

x_range = np.arange(start=10, stop=20, step=1)
x_range

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [3]:
# Return random floats in the half-open interval [0.0, 1.0)

x_float01 = np.random.random(10)
x_float01

array([0.4236548 , 0.64589411, 0.43758721, 0.891773  , 0.96366276,
       0.38344152, 0.79172504, 0.52889492, 0.56804456, 0.92559664])

# 1. Numpy array attributes

- Attributes of numpy arrays to access information about the array.
- e.g dimension, size, shape, datatype, byte size of single elements or complete array etc.

## (a) Define multidimensional numpy arrays
Let's define multidimensional arrays of random integers in range(0, 10)

In [24]:
import numpy as np
np.random.seed(0)  # Set seed for reproducibility

# 1D array
x1= np.random.randint(10, size=6)  # 1D array/tensor of 6 elements of values in range of 0 to 10

# 2D array
x2= np.random.randint(10, size=(3,4))  # 2D array/tensor of shape (3,4)

#3D array
x3 = np.random.randint(10, size=(3,4,5))  #3D array/tensor of shape (3,4,5)

In [25]:
print(f"{x1=}")
print(f"{x2=}")
print(f"{x3=}")

x1=array([5, 0, 3, 3, 7, 9])
x2=array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])
x3=array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])


## (b) Access Array Attributes

1. .ndim  (number of dimensions of array)
2. .shape (Tuple with number size of array in each dimension, like shape of a tensor)
3. .size  (Total number of elements in an array)
4. .dtype
5. .itemsize
6. .nbytes

In [26]:
# For x1
print(f"{x1.ndim=}")     # Dimension of array
print(f"{x1.shape=}")    # Shape of array- tuple of number of elements in each array
print(f"{x1.size=}")     # Total number of Elements in array
print(f"{x1.dtype=}")    # Data-type of elements
print(f"{x1.itemsize=}") # Size of each element in bytes
print(f"{x1.nbytes=}")   # Size of total array in bytes- 6x8 butes here.


x1.ndim=1
x1.shape=(6,)
x1.size=6
x1.dtype=dtype('int64')
x1.itemsize=8
x1.nbytes=48


In [27]:
assert x2.ndim == 2
assert x2.shape == (3,4)
assert x2.size == 12
assert x2.dtype == 'int64'
assert x2.itemsize == 8
assert x2.nbytes == 96

In [28]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [29]:
assert x3.ndim == 3
assert x3.shape == (3,4,5)
assert x3.size == 60
assert x3.dtype == 'int64'
assert x3.itemsize == 8
assert x3.nbytes == 480

# 2. Access Array Elements
- Similar to python list, use [i] to access ith element of numpy array.


In [30]:
# 1D array elements

x1 = np.array([5, 0, 3, 3, 7, 9])
assert x1[0] == 5
assert x1[-1] == 9
assert x1[-2] == 7

In [31]:
# Multidimensional array elements
x2 = np.array([[3, 5, 2, 4],
               [7, 6, 8, 8],
               [1, 6, 7, 7]])


assert x2[-1,0] == 1
assert x2[-2,3] == 8


- We can modify values of array using index.

In [32]:
x3[2,2,3] = 1000
x3

array([[[   8,    1,    5,    9,    8],
        [   9,    4,    3,    0,    3],
        [   5,    0,    2,    3,    8],
        [   1,    3,    3,    3,    7]],

       [[   0,    1,    9,    9,    0],
        [   4,    7,    3,    2,    7],
        [   2,    0,    0,    4,    5],
        [   5,    6,    8,    4,    1]],

       [[   4,    9,    8,    1,    1],
        [   7,    9,    9,    3,    6],
        [   7,    2,    0, 1000,    5],
        [   9,    4,    4,    6,    4]]])

**NOTE: Unlike python list, the datatype of numpy array is fixed. Means if we try to input float value in int64 type array, it will truncate the float value to int, without any WARNING.** See Example:

In [33]:
x3[2,2,3] = 1000.123
x3

array([[[   8,    1,    5,    9,    8],
        [   9,    4,    3,    0,    3],
        [   5,    0,    2,    3,    8],
        [   1,    3,    3,    3,    7]],

       [[   0,    1,    9,    9,    0],
        [   4,    7,    3,    2,    7],
        [   2,    0,    0,    4,    5],
        [   5,    6,    8,    4,    1]],

       [[   4,    9,    8,    1,    1],
        [   7,    9,    9,    3,    6],
        [   7,    2,    0, 1000,    5],
        [   9,    4,    4,    6,    4]]])

# 3. Array Slicing
- Similar to python lists. 
```python 
        Syntax: x[start:stop:step]

In [34]:
x = np.arange(10)  # Consider single entry as 0 to stop with default step 1
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
# 1. 1D Subarray

assert list(x[:5]) == [0, 1, 2, 3, 4]
assert list(x[5:]) == [5, 6, 7, 8, 9]
assert list(x[4:7]) == [4,5,6]
assert list(x[1:9:2]) == [1,3,5,7]
assert list(x[1::3]) == [1,4,7]


In [36]:
# 2. Negative Step values -- Reverses the array

assert list(x[:-1]) == [0, 1, 2, 3, 4, 5, 6, 7, 8]
assert list(x[-3:1:-2]) == [7,5,3] 

# Try to understand start and step
print(x[-3:1:-1])
print(x[-3:1:1])


[7 6 5 4 3 2]
[]


In [37]:
# 3. Multidim Subarrays - Similar, just only separated bu commas

np.random.seed(0)
x2 = np.random.randint(12, size=(3,4))
x2


array([[ 5,  0,  3, 11],
       [ 3,  7,  9,  3],
       [ 5,  2,  4,  7]])

In [38]:
assert list(x2[0,:]) == [ 5,  0,  3, 11]  # First row
assert list(x2[:, 2]) == [3,9,4]  # Third column
assert np.all(x2[0:2, 1:3] == np.array([[0,3],[7,9]]))  # np.all compares if all elements of both sides equal

### SubArray copies - Important
- Similar to 1D array we can update multidim subarrays and thus main array will also be updated.
- But if we just want change subarray and not the original array, we can create using: 
  
  ``` python
        subarray[i:j, m:n].copy()

In [39]:
x2

array([[ 5,  0,  3, 11],
       [ 3,  7,  9,  3],
       [ 5,  2,  4,  7]])

In [43]:
# No-Copy views
x2_sub = x2[:2, :2]  
x2_sub[0,0] = 1000
x2  # Original array changed through subarray

array([[1000,    0,    3,   11],
       [   3,    7,    9,    3],
       [   5,    2,    4,    7]])

In [48]:
# If we don't want to change original array
np.random.seed(0)
x3 = np.random.randint(12, size=(3,4)) # Same x_2d
x3_sub = x3[:2,:2].copy()
x3_sub[0,0] = 1000  
x3   # Didn't change original array

array([[ 5,  0,  3, 11],
       [ 3,  7,  9,  3],
       [ 5,  2,  4,  7]])

# 4. Reshaping of Arrays

## (a) .reshape()
- We can change dimension of an array in numpy using

``` python
                    .reshape()
```

- Always match the size of array and reshape size before converting, otherwise get error.



In [59]:
# Create a 1D array
grid = np.arange(1,10)
assert grid.ndim == 1

grid

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [65]:
# Reshape to 2D
grid = grid.reshape(3,3)
assert grid.ndim == 2

grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [66]:
# Change to 1D
grid = grid.reshape(9)
assert grid.ndim == 1

grid

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

## (b) newaxis keyword

In [68]:
# 1. Create a row vector using reshape

x= np.array([1,2,3])
x.reshape(1,3) 

array([[1, 2, 3]])

In [69]:
# Using newaxis
x[np.newaxis, :]

array([[1, 2, 3]])

In [70]:
# 2. Create a column vector using reshape
x.reshape(3,1)

array([[1],
       [2],
       [3]])

In [71]:
# Using newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

# 5. Array Concatenation
``` python
- np.concatenate
- np.vstack
- np.hstack
```

- For 1D array, concatenation will result in 1D array only
- But For multidimensional array, concatenation can be performed in two ways:
  1. row wise (i.e. vertical concatenation)
  2. column  wise (i.e. horizontal concatenation)

## (a) 1D Array 
``` python
- np.concatenate

In [78]:
x = np.array([1,2,3,4])
np.concatenate([x,x])

array([1, 2, 3, 4, 1, 2, 3, 4])

## (b) 2D Array
- Use concatenate
- Use vstack (same as concatenate with axis 0)
- Use hstack (same as concatenate with axis 1)

In [84]:
x1 = np.array([[1,2], [3,4]])
np.concatenate([x1,x1])  #default axis=0 i.e. vertical stitching


array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

In [83]:
np.concatenate([x1,x1], axis=1)  # Horizontal stitching

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

In [87]:
np.vstack([x1,x1])

array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

In [88]:
np.hstack([x1,x1])

array([[1, 2, 1, 2],
       [3, 4, 3, 4]])

# 6. Array Splitting

``` python
- np.split
- np.vsplit
- np.hsplit
- np.dsplit

## (a) 1D array
``` python
- np.split

In [104]:
# 1. numpy.split(ary, indices_or_sections, axis=0)

np.random.seed(0)
x = np.array([5, 0, 3, 3, 7, 9, 3, 5, 2, 4])
x1, x2, x3 = np.split(x,[2,6])              # Split at index 2 and 6
assert list(x1) == [5,0]
assert list(x2) == [3,3,7,9]
assert list(x3) == [3,5,2,4]

# 2. Multidimensional Array

- vsplit is split with axis=0.
- hsplit is split with axis=1.
- dpslit is to split 3D array on third axis.
``` python
- np.vsplit
- np.hsplit
- no.dsplit

In [113]:
grid = np.arange(5,21).reshape(4,4)
grid


array([[ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [114]:
np.vsplit(grid, [3])  # Split at row index 3

[array([[ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [13, 14, 15, 16]]),
 array([[17, 18, 19, 20]])]

In [118]:
np.hsplit(grid,[3])  # Split at column index 3

[array([[ 5,  6,  7],
        [ 9, 10, 11],
        [13, 14, 15],
        [17, 18, 19]]),
 array([[ 8],
        [12],
        [16],
        [20]])]