### NUMPY: Arrays and Matrices

**NumPy** is an extension to the Python programming language, adding support for large, multi-dimensional (numerical) arrays and matrices, along with a large library of high-level mathematical functions to operate on these arrays. NumPy is having the full form **"Numeric Python"**.

In [None]:
import numpy as np

### Create arrays

In [None]:
# create ndarrays from lists
# note: every element must be the same data type (will be converted is possible)
data1 = [1, 2, 3, 4, 5]        # list (all are interger data)
arr1 = np.array(data1)         # 1-D array
print (data1, type(data1), arr1, type(arr1))
data1 = [1, 2.5, 3, 4, 5]      # list (mixed data types)
arr1 = np.array(data1)         # 1-D array (converted to all float data)
print (data1, type(data1), arr1, type(arr1))

In [None]:
print (list(range(1, 5)))             # range function creates a list
data2 = [range(1, 5), range(5, 9)]    # list of lists
arr2 = np.array(data2)                # 2-D array
print (data2, type(data2))
print (arr2, type(arr2))
for x in np.nditer(arr2):             # iterating over the ndarray
    print (x, end = ", ")

In [None]:
list2 = arr2.tolist()                  # converting ndarray back to a list
print (list2, type(list2))

In [None]:
# examining arrays
print (arr1, type(arr1), id(arr1))
print (arr2, type(arr2), id(arr2))
print (arr1.dtype, arr2.dtype)         # data type of the array
print (arr1.shape, type(arr1.shape))   # shape of the array (5,) => Singleton notation
print (arr2.shape, type(arr2.shape))   # shape of the array (2, 4)
print (arr1.ndim, arr2.ndim)           # number of dimensions
print (arr1.size, arr2.size)           # total number of elements
print (len(arr1), len(arr2))           # size of the first dimension (aka axis)

In [None]:
# create special arrays
print (np.zeros(10))
print (np.zeros(10, dtype = np.int8))
print (np.zeros(10).astype(int))
print (np.zeros((3, 6)))
print (np.zeros((3, 6), int))
print (np.zeros((3, 6)).astype(int))
print (np.ones(10))
print (np.ones((3, 6)))
print (np.linspace(0, 1, 5))  # 0 to 1 (inclusive) with 5 points
print (np.logspace(0, 3, 4))  # 10^0 to 10^3 (inclusive) with 4 points

In [None]:
# arange is like rage, except it returns an array (but not a list)
int_list = list(range(5))
print (int_list, type(int_list[0]), type(int_list))
int_array = np.arange(5)
print (int_array, int_array.dtype, type(int_array))
float_array = int_array.astype(float)
print (float_array, float_array.dtype, type(float_array))

### Reshaping

In [None]:
matrix = np.arange(10, dtype = float)
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = matrix.reshape((2, 5))
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = matrix.reshape((2, 5)).astype(int)
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = matrix.astype(int).reshape((2, 5))
print (matrix, matrix.dtype, type(matrix), matrix.shape)
# transpose of a matrix
matrix = matrix.T
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = matrix.flatten()
print (matrix, matrix.dtype, type(matrix), matrix.shape)

In [11]:
matrix = np.arange(10, dtype = float).reshape((2, 5))
print (matrix)

print ('\nSorted in C-style order (Row Major):')
cmatrix = matrix.copy(order = 'C')
print (cmatrix)
for x in np.nditer(cmatrix):
    print (x, end = ", ")
print ()
print ('\nSorted in F-style order (Column Major):')
fmatrix = matrix.copy(order = 'F')
print (fmatrix)
for x in np.nditer(fmatrix):
    print (x, end = ", ")

[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]

Sorted in C-style order (Row Major):
[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 

Sorted in F-style order (Column Major):
[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]
0.0, 5.0, 1.0, 6.0, 2.0, 7.0, 3.0, 8.0, 4.0, 9.0, 

### Append, Insert, Delete and Sort

In [12]:
matrix = np.arange(10, dtype = np.int8)
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = np.append(matrix, [10, 11, 12])
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = np.insert(matrix, 3, [13, 14, 15])
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = np.delete(matrix, [5, 6, 7])
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = np.delete(matrix, range(5, 8))
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = np.sort(matrix)    # sort in the ascending order
print (matrix, matrix.dtype, type(matrix), matrix.shape)
matrix = -np.sort(-matrix)    # sort in the descending order
print (matrix, matrix.dtype, type(matrix), matrix.shape)

[0 1 2 3 4 5 6 7 8 9] int8 <class 'numpy.ndarray'> (10,)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12] int32 <class 'numpy.ndarray'> (13,)
[ 0  1  2 13 14 15  3  4  5  6  7  8  9 10 11 12] int32 <class 'numpy.ndarray'> (16,)
[ 0  1  2 13 14  5  6  7  8  9 10 11 12] int32 <class 'numpy.ndarray'> (13,)
[ 0  1  2 13 14  8  9 10 11 12] int32 <class 'numpy.ndarray'> (10,)
[ 0  1  2  8  9 10 11 12 13 14] int32 <class 'numpy.ndarray'> (10,)
[14 13 12 11 10  9  8  2  1  0] int32 <class 'numpy.ndarray'> (10,)


### Concatenation and Stack of 2 Arrays

In [13]:
a=np.array([[1, 2], [3, 4]])
print ('First array:')
print (a)
print ('\n')

b = np.array([[5, 6], [7, 8]])
print ('Second array:')
print (b)
print ('\n')

# both the arrays are of same dimensions
print ('Joining the two arrays along axis 0:')
print (np.concatenate((a, b)))
print ('\n')

print ('Joining the two arrays along axis 1:')
print (np.concatenate((a, b), axis=1))

First array:
[[1 2]
 [3 4]]


Second array:
[[5 6]
 [7 8]]


Joining the two arrays along axis 0:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


Joining the two arrays along axis 1:
[[1 2 5 6]
 [3 4 7 8]]


In [14]:
print ('Stack the two arrays along axis 0:')
print (np.stack((a, b), 0))
print ('\n')

print ('Stack the two arrays along axis 1:')
print (np.stack((a, b), 1))

Stack the two arrays along axis 0:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


Stack the two arrays along axis 1:
[[[1 2]
  [5 6]]

 [[3 4]
  [7 8]]]


In [15]:
matrix = np.array([[100, 200, 300, 400]])
print (matrix, type(matrix), matrix.ndim)
arr = matrix.reshape(-1)
print (arr, type(arr), matrix.ndim)
arr = matrix.reshape(-1, 1, 4)
print (arr, type(arr), matrix.ndim)
arr = matrix.reshape(1, 4, -1)
print (arr, type(arr), matrix.ndim)

[[100 200 300 400]] <class 'numpy.ndarray'> 2
[100 200 300 400] <class 'numpy.ndarray'> 2
[[[100 200 300 400]]] <class 'numpy.ndarray'> 2
[[[100]
  [200]
  [300]
  [400]]] <class 'numpy.ndarray'> 2


### Selection

In [None]:
data1 = [1, 2, 3, 4, 5]        # list
arr1 = np.array(data1)         # 1-D array
print (arr1, type(arr1), arr1.dtype, arr1.shape)
print (arr1[0], arr1[-5])      # 0-th element (indexing like a list)
print (arr1[4], arr1[-1])
print (arr1[2:], arr1[:3])
print (arr1[::-1])

In [None]:
data2 = [range(1, 5), range(5, 9)]    # list of lists
arr2 = np.array(data2)                # 2-D array
print (arr2, type(arr2), arr2.dtype, arr2.shape)
print (arr2[1][2])                    # indexing
print (arr2[0, :], arr2[1, :])        # slicing
print (arr2[:, 2])
print (arr2[:, 1:4])                  # all rows and columns from 1 (inclusive) to 4 (exclusive)

### Views and copies

In [None]:
arr = np.arange(10)
print (arr, type(arr), arr.dtype, arr.shape)
print (arr[5:8])
arr[5:8] = 12       # all the pre-existing values at those index places will be over-written
print (arr)
arr_view = arr[5:8]   # creates a view of arr, not a copy
print (arr_view, type(arr_view), arr_view.dtype, arr_view.shape)
arr_view[:] = 13
print (arr_view)
print (arr)
arr_copy = arr[5:8].copy()   # creates a copy of arr
print (arr_copy, type(arr_copy), arr_copy.dtype, arr_copy.shape)
arr_copy[:] = 14
print (arr_copy)
print (arr)

In [None]:
# on the other hand in case of list
list1 = [11, 22, 33, 44]
list2 = list1
print (list1, id(list1), list2, id(list2)) # ids are same
list1[0] = 100
print (list1, list2)
list3 = []
list3[:] = list1
print (list1, id(list1), list3, id(list3)) # ids are different
list1[0] = 500
print (list1, list3)

### Using boolean arrays

In [None]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
print (arr, arr > 5)
print (arr[arr > 5], arr[~(arr > 5)])  # ~ denotes not
print (arr % 2 == 0)
print (arr[arr % 2 == 0])
print (arr[(arr > 3) & (arr < 8)])
arr = np.arange(0, 61)
print (arr[(arr % 2 == 0) & (arr % 3 == 0)])

In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob'])
print (names)
print (names == 'Bob')
print (names[names == 'Bob'])
print (names[names != 'Bob'])
print (names[(names == 'Bob') | (names == 'Will')])  # don't use or but use |
names[names != 'Bob'] = 'Tom'   # assign based on a logical selection
print (names)
print (np.unique(names))        # getting all unique names

### Vectorized operations

In [None]:
nums = np.arange(5)
print (nums)
print (nums * 5)         # multiply each element by 5
nums = np.sqrt(nums)     # square root of each element
print (nums)       
print (np.ceil(nums))    # finding ceiling of each element
print (np.floor(nums))   # finding floor of each element
nums = np.arange(5)
print (nums + np.arange(5)) # add element wise
print (np.isnan(nums))   # checks for NaN
nums = 2 * nums
print (nums)

In [None]:
nums = np.array([100, 200, np.nan, 400, np.nan, 500.5, 600])
print (nums)
print (np.isnan(nums))

In [None]:
nums = np.arange(5)
print (nums)
print (np.maximum(nums, np.array([10, -2, 13, -4, 5]))) # compare element wise
print (np.minimum(nums, np.array([10, -2, 13, -4, 5])))

In [None]:
# calculating Euclidean distance between 2 vectors
vect1 = np.random.randn(10)   # normalized random numbers, average and std near to 0 and 1 respectively
print (vect1, np.average(vect1), np.std(vect1))
vect2 = np.random.randn(10)
print (vect2, np.average(vect2), np.std(vect2))
total = 0
for i in range(len(vect1)):
    total = total + (vect1[i] - vect2[i]) ** 2
print (np.sqrt(total))   # Calculation of Euclidean distance - 1
print (np.sqrt(np.sum((vect1 - vect2) ** 2)))   # Calculation of Euclidean distance - 2
print (np.sqrt(np.sum(pow(np.subtract(vect1, vect2), 2))))   # Calculation of Euclidean distance - 3
print (np.linalg.norm(vect1 - vect2))   # Calculation of Euclidean distance - 4
print (np.linalg.norm(np.subtract(vect1, vect2)))   # Calculation of Euclidean distance - 5

In [None]:
vec1 = np.random.rand(10)   # random numbers ranging from 0 to 1
print (vec1)

In [None]:
# math and stats
rnd = np.random.randn(4, 2)
print (rnd)
print (rnd.mean())
print (rnd.std())
print (rnd.argmin(), rnd.argmax())   # index of the minimum and maximum element
print (rnd.sum(axis = 0))   # sum of columns
print (rnd.sum(axis = 1))   # sum of rows

In [None]:
# methods for boolean arrays
rnd = np.random.randn(4, 2)
print (rnd)
print ((rnd > 0).sum())      # counts the number of positive values
print ((rnd > 0).any())      # checks if any value is True
print ((rnd > 0).all())      # checks if all values are True

In [None]:
# random numbers
np.random.seed(1001)            # set the seed value for the random number generation
nums = np.random.rand(2, 3)     # 2 x 3 matrix
print (nums)
nums = np.random.randn(10)      # random normals (mean with 0 and std with 1)
print (nums)
nums = np.random.randint(0, 2, 10)    # 10 randomly picked 0 or 1
print (nums)

In [None]:
nums = np.random.rand(2, 3)     # 2 x 3 matrix, outside of the cell seed has no effect
print (nums)

### Broadcasting

Implicit conversion to allow operations on arrays of different sizes.
Here the smaller array is stretched or "broadcasted" across the larger array so that they become compatible in shapes.

**Rules:**
staring with the trailing axis and working backwoard, Numpy compares arrays for dimensions -
* If two dimensions are equal then continue.
* If one of the operand has dimension 1 stretches it to match the largest one.

![Numpy%20Broadcast-3.png](attachment:Numpy%20Broadcast-3.png)

In [None]:
a1 = np.array([[ 0,  0,  0],
             [10, 10, 10],
             [20, 20, 20],
             [30, 30, 30]])
print (a1, type(a1))
a2 = np.array([[0], [10], [20], [30]])
print (a2, type(a2))

b1 = np.array([[0, 1, 2],
               [0, 1, 2],
               [0, 1, 2],               
               [0, 1, 2]])
print (b1, type(b1))
b2 = np.array([0, 1, 2])
print (b2, type(b2))

result = a1 + b1
print (result)

result = a1 + b2
print (result)

result = a2 + b1
print (result)

result = a2 + b2
print (result)

In [None]:
result = np.multiply(a2, b2)   # matrix multiplication
print (result)

### Writing and Reading from a Data File

In [None]:
a=np.array([1, 2, 3, 4, 5])
np.save('outfile', a)   # will save the array content to 'outfile.npy' file
b = np.load('outfile.npy')   # reading the content from the file again
print (b)
np.savetxt('outfile.txt', a)  # will save the array content to 'outfile.txt' file
b = np.loadtxt('outfile.txt')   # reading the content from the file again
print (b)