# Notebook Tips

- Shift + Tab next to a method to show Docstrings.  
- Tab after an object. to see available fields/methods.


# NumPy Arrays

In [1]:
import numpy as np

In [2]:
list = [1, 2, 3]

In [4]:
list # python list

[1, 2, 3]

In [3]:
np.array(list) # numpy array, realize the differences of the two outputs

array([1, 2, 3])

In [5]:
nested_list = [[1, 2], [3, 4], [5, 6]] # two dimensional arrays

In [6]:
nested_list

[[1, 2], [3, 4], [5, 6]]

In [7]:
np.array(nested_list) # matrices, can realize how many dimensions by looking at how many consercutive [[]]

array([[1, 2],
       [3, 4],
       [5, 6]])

In [9]:
#typically we don't transform python objects into numpy array, generate using numpy methods instead

In [10]:
np.arange(0, 10) # press Shift + Tab to see Docstring

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
np.arange(0, 10, 2)

array([0, 2, 4, 6, 8])

In [12]:
np.zeros(3) # floating point precision number

array([0., 0., 0.])

In [13]:
np.zeros((4,4)) #4x4 shape 2D matrices

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [14]:
np.ones(3)

array([1., 1., 1.])

In [15]:
np.ones((4,10)) # (4, 10) python tuple object

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [21]:
np.linspace(0, 10, 3) # Returns `num` evenly spaced samples, calculated over the interval [`start`, `stop`].

array([ 0.,  5., 10.])

In [22]:
np.linspace(0, 10, 10) #include stop value


array([ 0.        ,  1.11111111,  2.22222222,  3.33333333,  4.44444444,
        5.55555556,  6.66666667,  7.77777778,  8.88888889, 10.        ])

In [23]:
np.eye(5) # Identity matrix. Return a 2-D array with ones on the diagonal and zeros elsewhere.

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [24]:
# UNIFORM DISTRIBUTION
# Create an array of the given shape and populate it with
# random samples from a uniform distribution
# over ``[0, 1)``.
np.random.rand(2)

array([0.51999516, 0.81605092])

In [27]:
np.random.rand(3, 4) #2D matrix with 3 rows 4 columns between 0 and 1. Notice that the parameters aren't in tuple form

array([[0.39620171, 0.48368702, 0.26720291, 0.45227539],
       [0.07504903, 0.04790027, 0.79328187, 0.03327744],
       [0.51283889, 0.87332591, 0.75553595, 0.34740715]])

In [28]:
# STANDARD NORMAL DISTRIBUTION
np.random.randn(5)

array([-1.58007616,  0.33993744,  1.92292156,  0.63746778,  0.42784779])

In [29]:
np.random.randint(0, 100)

7

In [31]:
# this guarantee same random numbers are generate 
# given that same parameters are passed in from different calls/computers
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [44]:
reshape_me = np.arange(0, 25)

In [45]:
reshape_me

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [49]:
reshape_me.shape # only 1D defined 

(25,)

In [54]:
reshaped = reshape_me.reshape(5, 5) # Returns an array containing the same data with a new shape.

In [56]:
reshape_me

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [55]:
reshaped.shape

(5, 5)

In [38]:
rand_arr = np.random.randint(0, 100, 10)

In [39]:
rand_arr

array([82, 86, 74, 74, 87, 99, 23,  2, 21, 52])

In [41]:
rand_arr.reshape(2, 5)

array([[82, 86, 74, 74, 87],
       [99, 23,  2, 21, 52]])

In [58]:
rand_arr.max()

99

In [59]:
rand_arr.argmax() # Return indices of the maximum values along the given axis.

5

In [60]:
rand_arr.min()

2

In [61]:
rand_arr.argmin()

7

In [63]:
rand_arr.dtype #64bit integer datatype

dtype('int64')

# Numpy Index Selection

In [2]:
import numpy as np

In [3]:
arr = np.arange(0, 11)

In [4]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [5]:
arr[8]

8

In [7]:
arr[1:5] # up to exclusive

array([1, 2, 3, 4])

In [8]:
arr[:5] # start from the beginning

array([0, 1, 2, 3, 4])

In [9]:
arr[5:] # starting from 5 all the way to the end

array([ 5,  6,  7,  8,  9, 10])

## Broadcasting

In [10]:
arr[1:5] = 100 # unable to do it in Python, broadcasting 100 to index 1-4

In [11]:
arr

array([  0, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [12]:
slice_of_arr = arr[:5] # only pointer to original array arr, see below

In [13]:
slice_of_arr[:] = 99

In [14]:
slice_of_arr

array([99, 99, 99, 99, 99])

In [15]:
arr # original values got changed

array([99, 99, 99, 99, 99,  5,  6,  7,  8,  9, 10])

In [16]:
# Use array copy 
arr_copy = arr.copy()

In [17]:
arr_copy

array([99, 99, 99, 99, 99,  5,  6,  7,  8,  9, 10])

In [18]:
arr_copy[:] = 100

In [19]:
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [20]:
arr

array([99, 99, 99, 99, 99,  5,  6,  7,  8,  9, 10])

In [21]:
# 2d array
arr_2d = np.array([[1, 2, 3], [99, 100, 101], [102, 103, 104]])

In [22]:
arr_2d

array([[  1,   2,   3],
       [ 99, 100, 101],
       [102, 103, 104]])

In [23]:
arr_2d.shape

(3, 3)

In [24]:
arr_2d[2]

array([102, 103, 104])

In [25]:
arr_2d[2][1]

103

In [26]:
arr_2d[2, 1] # different syntax

103

In [27]:
arr_2d[1:]

array([[ 99, 100, 101],
       [102, 103, 104]])

In [28]:
arr_2d[1:, 1:]

array([[100, 101],
       [103, 104]])

## Conditional Selection

In [29]:
arr = np.arange(1, 11)

In [30]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [32]:
# broad a condition/comparison off of arr
arr > 4 # where is my array greater than 4? -> return numpy array of Boolean values

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [33]:
bool_arr = arr > 4

In [34]:
bool_arr

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [36]:
arr[bool_arr] # only return INDEXES where bool_arr=True

array([ 5,  6,  7,  8,  9, 10])

In [37]:
arr[arr > 4] # equivalent as above

array([ 5,  6,  7,  8,  9, 10])

# NumPy Operations

In [1]:
import numpy as np

In [39]:
arr = np.arange(1, 11)

In [40]:
arr + 5 # addition to element by element basis

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [41]:
arr - 2 # NOTES that original array didn't get mutated, this is just a new array

array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8])

In [42]:
arr + arr # has to be the SAME shape in order for this to work, can do this for all operations like *, /, -

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [45]:
# NOTE that division by 0 gets a warning with NAN - not a number
arr_range = np.arange(0, 11)

In [46]:
arr_range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [47]:
arr_range / arr_range # index 0 is value 0. 0/0 gets NaN

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [48]:
# In Python, division by 0 would results in runtime error
1 / 0

ZeroDivisionError: division by zero

In [49]:
# however in numpy (RuntimeWarning), results in INF - infinity
1 / arr_range # any scalar other than 0 divided by 0 gets INF

  


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
       0.1       ])

In [50]:
np.sqrt(arr_range)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [51]:
np.sin(arr_range)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

In [52]:
np.log(arr_range) # log of 0 gives -inf - negative infinity

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,
       2.30258509])

In [53]:
# docs.scipy.org/doc/numpy to check out all universal function calls

In [54]:
# we would use the following scientic function calls commonly

In [55]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [56]:
arr.sum()

55

In [57]:
arr.mean() # Mean (the simple average of the numbers)

5.5

In [58]:
arr.max()

10

In [59]:
# To calculate the variance follow these steps: 
# Work out the Mean (the simple average of the numbers) 
# Then for each number: subtract the Mean and square the result (the squared difference). 
# Then work out the average of those squared differences. 
arr.var() # variance

8.25

In [60]:
# In statistics, the standard deviation is a measure of the amount of variation or dispersion of a set of values. 
# A low standard deviation indicates that the values tend to be close to the mean of the set, 
# while a high standard deviation indicates that the values are spread out over a wider range.
arr.std() # standard deviation

2.8722813232690143

In [4]:
arr2d = np.arange(0, 25).reshape(5, 5) # reshape to create two dimensional arrays

In [5]:
arr2d

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [8]:
arr2d.shape # (5, 5) two dimensional array

(5, 5)

In [9]:
arr2d.sum() # sum up all numbers

300

In [10]:
arr2d.sum(axis=0) # in (5, 5) array, first 5 is row and second 5 is column. Thus axio=0 sums by row that is vertical

array([50, 55, 60, 65, 70])

In [12]:
arr2d.sum(axis=1) # sum across the column that is horizontally

array([ 10,  35,  60,  85, 110])