### Install and Import

In [47]:
# if using a conda environment, ensure that while running the notebook, the kernel corresponding to that 
# conda environment is selected

# install
%pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [48]:
# importing the numpy package with an alias np
import numpy as np

### Introduction

In [49]:
array_basic = np.array([1,2,3]) # create a simple, one dimensional array
print(array_basic, type(array_basic)) # see how array looks like and what its type is

[1 2 3] <class 'numpy.ndarray'>


In [50]:
array_2dims = np.array([[1,2,3],[3,4,5]]) # this is a 2 dimensional array
print(array_2dims)
print(type(array_2dims))

[[1 2 3]
 [3 4 5]]
<class 'numpy.ndarray'>


In [51]:
array_float = np.array([1.0,2.0]) # an array with float values
print(array_float)
print(type(array_float))

[1. 2.]
<class 'numpy.ndarray'>


In [52]:
# Getting to know the size and shape of an array
print(array_basic.shape)
print(array_basic.size) # size method indicates the number of entries in the array
print(array_basic.ndim)
print(array_2dims.shape)
print(array_2dims.size)
print(array_2dims.ndim)

(3,)
3
1
(2, 3)
6
2


In [53]:
# Also know the type of data in a numpy array (not the type of the numpy array itself, which we were seeing above using the type() function)
print(array_basic.dtype)
print(array_2dims.dtype)
print(array_float.dtype)

# Note: when we created the array, the datatype was decided upon by numpy by looking at the entries in the array.

int64
int64
float64


In [54]:
# However, we can also specify the datatype ourselves while initializing the array
array_int16 = np.array([1,2,3], dtype="int16")

# Note: if we know that a certain array will not contain very large values, say within -32767 till +32767
# which is the limit of int16, we can intialize with that datatype, and processing it will be faster than 
# what would be with int64, which is the default choice by numpy.

In [55]:
# we can get the number of bytes in memory consumed by each entry in a numpy array
print(array_basic.itemsize)
print(array_int16.itemsize)

# Note: Remember that int16 means that every value in it takes 16 bits, meaning 2 bytes (as 8 bits compose a byte)

8
2


### Accessing array elements

In [56]:
# Can access specific row and column of an array using a syntax like: array[row, col]. Remember that indexing starts from 0, so first row/col is 0th row/col.

array_2dims[0,0] # should return first row first col element, i.e. 1

1

In [57]:
# Can also access whole row or column, by using colon sign in place of the row/col identifier.

array_2dims[1, :] # should return the row with index 1, as all the columns have been selected using colon

array([3, 4, 5])

In [58]:
# Can also select elements at a certain gap between them, like: array[row, col_start:col_end:gap]

array_2dims[0, 0:2:1]

array([1, 2])

### Modifying array elements and array type

In [59]:
# Can simply modify elements in the following fashion:
print(array_2dims) # the how the array looked previously
array_2dims[1,2] = 9
print(array_2dims) # how it looks like after the value update

[[1 2 3]
 [3 4 5]]
[[1 2 3]
 [3 4 9]]


In [60]:
# Can also do modifications like we accessed the array's whole row/col
array_2dims[:, 1] = 2 # changes every entry in second col to 2
array_2dims

array([[1, 2, 3],
       [3, 2, 9]])

In [119]:
# can also change array type using astype function

array_2dims.astype("float64") # But remember that this creates a copy of the original array, so will have to do somthing like: array_2dims = array_2dims.astype("float64")

array([[1., 2., 3.],
       [3., 2., 9.]])

### Initializing different types of arrays

In [63]:
# An array composed of (zeros (very useful to know)
# Create a 4x5 matrix of zeroes
array_zeros = np.zeros((4,5)) # Notice the double brackets, to indicate that the (4,5) is just one argument of the np.zeros function 
print(array_zeros, array_zeros.shape)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]] (4, 5)


In [64]:
# Very similar to previous, can make a ones matrix
array_ones = np.ones((4,5))
print(array_ones)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [66]:
# For any number, use np.full
array_twos = np.full((4,5), 2) # the second argument (2), is the number which we want every entry in the array to be equal to
print(array_twos)

[[2 2 2 2 2]
 [2 2 2 2 2]
 [2 2 2 2 2]
 [2 2 2 2 2]]


In [69]:
# To create an array of random float64 values
array_rands = np.random.rand(4,5) # Notice that here we don't use double brackets, because the function is designed a bit differently
print(array_rands)

[[0.45764008 0.93459526 0.04193301 0.40508028 0.3684482 ]
 [0.84374379 0.06678458 0.63007489 0.70730883 0.52941534]
 [0.13807703 0.89165111 0.24612214 0.63372279 0.7400107 ]
 [0.71170016 0.20484818 0.27605595 0.31823257 0.04926966]]


In [71]:
# To create an array of random ints the syntax is: np.random.randint(lower_limit, upper_limit, size=(dim1, dim2...))
array_randints = np.random.randint(1,9, size=(4,5)) # Note that the function name itself changed, rather than an argument
print(array_randints)

[[7 2 1 4 7]
 [4 1 3 3 3]
 [8 7 8 2 6]
 [5 3 8 2 3]]


In [73]:
# Also note that all these things can be done with array with different dimensions too!
# example, to create a random float array of 3 dimensions:
array_rand3d = np.random.rand(4,5,4)
print(array_rand3d)

[[[0.95206785 0.94189088 0.29877279 0.85804352]
  [0.64618784 0.81853388 0.15324921 0.09418391]
  [0.21764902 0.18522765 0.19624737 0.62479186]
  [0.97175958 0.23054867 0.52667101 0.46501316]
  [0.77575746 0.4547242  0.01225287 0.47439717]]

 [[0.2939129  0.81599695 0.86223291 0.47087348]
  [0.91131478 0.6833063  0.63955889 0.6704524 ]
  [0.50497981 0.9990075  0.38540782 0.7040454 ]
  [0.23654643 0.21690236 0.3305521  0.59575488]
  [0.04361893 0.12154295 0.13777441 0.04815248]]

 [[0.10236607 0.01866216 0.13918772 0.7479766 ]
  [0.57167764 0.26234913 0.12462935 0.87322879]
  [0.79875133 0.37180727 0.18087981 0.42420728]
  [0.39861963 0.63168838 0.39693706 0.70186783]
  [0.64126111 0.89423964 0.07465024 0.60200509]]

 [[0.57598951 0.38658123 0.9337926  0.83469331]
  [0.58921521 0.61873622 0.65337625 0.79988191]
  [0.40562724 0.51559881 0.27417031 0.78793732]
  [0.51306    0.37188484 0.11730508 0.16000257]
  [0.34829083 0.17597351 0.00134373 0.58554749]]]


In [75]:
# Can also create identity matrix (remember linear algebra!) like follows:
array_ident = np.identity(4)
print(array_ident)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


### Precautions while copying arrays

In [76]:
# Never copy arrays like this: array1 = array2, because then later if you modify array1, then it will also lead to modification of array2

array_temp1 = np.zeros(4)
array_temp2 = array_temp1

array_temp2[2] = 5
print(array_temp2)
print(array_temp1) # ideally, this shouldve been all zeros, but it is not!

[0. 0. 5. 0.]
[0. 0. 5. 0.]


In [78]:
# Instead copy arrays like:
array_temp1 = np.zeros(4)
array_temp2 = array_temp1.copy() # This .copy() will ensure that array_temp2 is an actual copy of array_temp1, rather than another name for the same thing

array_temp2[2] = 5
print(array_temp2)
print(array_temp1) # now its the same as before

[0. 0. 5. 0.]
[0. 0. 0. 0.]


### Maths on arrays

In [83]:
# Elementwise operations on arrays, when we want to do something to each element of an array
print(array_2dims)
print(array_2dims * 2)
print(array_2dims + 2)
print(array_2dims / 2)

[[1 2 3]
 [3 2 9]]
[[ 2  4  6]
 [ 6  4 18]]
[[ 3  4  5]
 [ 5  4 11]]
[[0.5 1.  1.5]
 [1.5 1.  4.5]]


In [90]:
# Operations among arrays
print(array_2dims)
array_2dims2 = np.array([[4,5,6],[7,6,5]])
print(array_2dims2)
array_2dims3 = array_2dims + array_2dims2
print(array_2dims3)

# Note: For doing such operations you need to ensure that the arrays are of the same dimensions 

[[1 2 3]
 [3 2 9]]
[[4 5 6]
 [7 6 5]]
[[ 5  7  9]
 [10  8 14]]


### Linear Algebra

In [97]:
# Now lets do matrix multiplication (Remember that array1 * array2 will give us elementwise multiplication, which is not what we want)
 
array_linalg1 = np.ones((3,4))
array_linalg2 = np.full((4,3), 2) # Remember that we need the first's numcols equal to second's numrows

print(array_linalg1)
print(array_linalg2)

array_mul_result = np.matmul(array_linalg1, array_linalg2) # this is the function numpy has to perform matrix multiplication
print(array_mul_result)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[2 2 2]
 [2 2 2]
 [2 2 2]
 [2 2 2]]
[[8. 8. 8.]
 [8. 8. 8.]
 [8. 8. 8.]]


In [100]:
# numpy also has a function to calculate the determinant of a matrix
np.linalg.det(array_mul_result) # Remember that the matrix needs to be square to be able to calculate its determinant

0.0

### Statistics

In [101]:
# Some simple statistics calculation on np arrays

print(np.min(array_2dims)) # minimum of the whole array
print(np.max(array_2dims)) # maximum of the whole array
print(np.sum(array_2dims)) # sum of the whole array

1
9
20


In [104]:
# Can also do stats at row/col level, using axis attribute of the stats functions
# axis = 0 means vertical, and axis = 1 means horizontal

print(array_2dims)
print(np.sum(array_2dims, axis=0))
print(np.sum(array_2dims, axis=1))

# Similarly, one can use the min and max functions also

[[1 2 3]
 [3 2 9]]
[ 4  4 12]
[ 6 14]


### Combining Arrays

In [110]:
# Vertically stack two arrays (Need to have same number of columns)
print(array_2dims)
print(array_2dims2)

np.vstack([array_2dims, array_2dims2]) # Notice the arrays are enclosed in square brackets

[[1 2 3]
 [3 2 9]]
[[4 5 6]
 [7 6 5]]


array([[1, 2, 3],
       [3, 2, 9],
       [4, 5, 6],
       [7, 6, 5]])

In [111]:
# Horizontally stack two array (Need to have same number of rows)

np.hstack([array_2dims, array_2dims2]) # Notice the arrays are enclosed in square brackets

array([[1, 2, 3, 4, 5, 6],
       [3, 2, 9, 7, 6, 5]])

### Loading data from file

In [114]:
# Read the data in data.txt in the current folder

array_read = np.genfromtxt("data.txt", delimiter=",") # delimiter (what separates cols in the data file) is comma in our case
print(array_read)

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]


### Boolean Masking

In [146]:
# We can index an array in more interesting and flexible ways
print(array_read)

print(array_read[([0,0],[1,1])]) # the things inside the parenthesis are the indices which i want

bool_mask = array_read > 30 # This creates a boolean mask on the array
print(bool_mask) 

array_read[bool_mask] # Using this, we can filter the array's elements

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]
[13. 13.]
[[False False False False  True  True False False  True False False False
  False False False False False False]
 [False  True False  True  True  True False  True False False False False
  False False False False False False]
 [False False  True False  True False False False  True False False False
  False False False False  True  True]]


array([196.,  75.,  34.,  42.,  33., 766.,  75.,  55.,  33., 999.,  78.,
        76.,  88.])