In [1]:
import numpy as np
import scipy.special

In [2]:
# The central part of numpy and scientific programming in python in general are numpy
# arrays. Let us make one and see how they behave! In other words, let us explore into it

np.array() # Unlike list and tuple, and set, and other python data structures, we cannot
           # initialize a numpy array empty! We must put something into it first!

TypeError: array() missing required argument 'object' (pos 0)

In [4]:
array = np.array([1, 2, 3, 4]) # we can make arrays out of lists and this works just like
                               # with any other data collections in python
    
display(array) # The list has been converted and is explicitly shown as an array

array([1, 2, 3, 4])

In [8]:
display(array.dtype) # get the datatype of the array

dtype('int64')

In [9]:
display(array.shape) # get the shape of the array

(4,)

In [15]:
display(array.astype(float)) # create a copy of the array with changing the datatype of
                             # every element into float. note that this does not change
                             # the array itself!
display(array.dtype)

array([1., 2., 3., 4.])

dtype('int64')

In [17]:
# many statistical functions can be called as methods on numpy arrays!

display(array.max())
display(array.min())
display(array.sum())
display(array.mean())
display(array.std())

4

1

10

2.5

1.118033988749895

In [18]:
# numpy arrays most importantly can be passed to numpy functions as arguments!

display(np.max(array))
display(np.min(array))
display(np.sum(array))
display(np.mean(array))
display(np.std(array))

4

1

10

2.5

1.118033988749895

In [31]:
n = 10
# there are other ways of initializing numpy arrays.

zeros = np.zeros(n) # initialize an array of size n filled with zeros
display(zeros)

ones = np.ones(n) # initialize an array of size n filled with ones
display(ones)

empty = np.empty(n) # creates an array of size n with values uninitialized
display(empty)

array = np.array([[1, 2], [3, 4]])
display(array.shape)
two_by_two_zeros = np.zeros_like(array) # creates an array in the same shape as 'array' and
                                        # fills its entries with zeros
display(two_by_two_zeros)


np.random.seed(103)
rand_array = np.random.random(24) # makes an array in the specified shape filled with
                                  # random entries between 0 and 1

display(rand_array)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

(2, 2)

array([[0, 0],
       [0, 0]])

array([0.43211121, 0.17421526, 0.17094369, 0.82763225, 0.58717126,
       0.45935375, 0.82268476, 0.82154811, 0.30712025, 0.20089438,
       0.40323246, 0.94788933, 0.67754443, 0.6089751 , 0.67236337,
       0.00798261, 0.33655992, 0.35727233, 0.48605999, 0.87828242,
       0.75490869, 0.63046494, 0.37648369, 0.59016159])

In [32]:
# Slicing in numpy is almost identical in its use in lists and tuples

display(rand_array[::-1]) # Every -1'st element, meaning the reversal

display(rand_array[3::5]) # Every 5th element starting at index 3

display(rand_array[10:21]) # Entries 10 to 20 inclusive

array([0.59016159, 0.37648369, 0.63046494, 0.75490869, 0.87828242,
       0.48605999, 0.35727233, 0.33655992, 0.00798261, 0.67236337,
       0.6089751 , 0.67754443, 0.94788933, 0.40323246, 0.20089438,
       0.30712025, 0.82154811, 0.82268476, 0.45935375, 0.58717126,
       0.82763225, 0.17094369, 0.17421526, 0.43211121])

array([0.82763225, 0.30712025, 0.6089751 , 0.48605999, 0.59016159])

array([0.40323246, 0.94788933, 0.67754443, 0.6089751 , 0.67236337,
       0.00798261, 0.33655992, 0.35727233, 0.48605999, 0.87828242,
       0.75490869])

In [41]:
# Numpy array slices are NOT copies like tuple slices or list slices.
# They are views, references to the original array. So, if you change them that will also
# affect the original array, and changing the original array will also affect the names
# that refer to slices.

my_list = [1, 2, 3, 4, 5]
array = np.array(my_list)

array_slice = array[1:3]
print(f'The array {array}, and the slice {array_slice} before any modifications.')
array_slice[0] = 55

print('Array after setting array_slice[0] = 55: ', array)
array[2] = -55
print('Slice after setting array[2] = -55: ', array_slice)

The array [1 2 3 4 5], and the slice [2 3] before any modifications.
Array after setting array_slice[0] = 55:  [ 1 55  3  4  5]
Slice after setting array[2] = -55:  [ 55 -55]


In [43]:
# In numpy, when we need a copy, we need to use numpy.copy:

array = np.array((1, 2, 3))
new_array = np.copy(array)

In [46]:
# Fancy indexing is allowed in numpy!

display(rand_array[[1, 19, 6]]) # give us a numpy array that consists of elements of 
                                # rand_array with indexes 1, 19, and 6. This could also be
                                # done with numpy arrays instead of lists

array([0.17421526, 0.87828242, 0.82268476])

In [47]:
# We can also use Boolean indexing here, which is an incredibly powerful tool in data
# analysis.

rand_array > 0.7 # This gives us the Boolean array such that the value of i-th entry is
                 # True if and only if rand_array[i] > 0.7 and False otherwise

array([False, False, False,  True, False, False,  True,  True, False,
       False, False,  True, False, False, False, False, False, False,
       False,  True,  True, False, False, False])

In [48]:
# If we want to gain an array of values of rand_array that are greater than 0.7
# (or any other boolean operation), we must put that into the indexing:

rand_array[rand_array > 0.7]

array([0.82763225, 0.82268476, 0.82154811, 0.94788933, 0.87828242,
       0.75490869])

In [54]:
# To get the indices of the True elements of a Boolean array, there is a nice numpy method
# numpy.where that accepts a Boolean array and returns an array of indices where the value
# is True.

np.where(rand_array > 0.7)

(array([ 3,  6,  7, 11, 19, 20]),)

In [78]:
# Mathematical operations on arrays are defined as elementwise operation between arrays.
# If we have an operation between a number and an array, then we treat that number as the
# array of the similar shape filled with it in entries.
# If it is impossible to spread out the operation from one array to the other (shapes not
# fitting), then we get an error.

display(np.array([5, 6, 7, 8])/np.array([1, 2, 3, 4])) # Two arrays of the same shape

display(-4 * rand_array) # Array and a scalar

display(rand_array**2)

# additionally, we may pass numpy arrays onto numpy (and other) mathematical functions.
display(np.dot(np.array([1, 2, 3, 4]), 
               np.random.random(4)))
display(np.exp(np.array([1, 2, 3, 4])))

array([5.        , 3.        , 2.33333333, 2.        ])

array([-1.72844482, -0.69686105, -0.68377474, -3.31052899, -2.34868504,
       -1.83741498, -3.29073903, -3.28619244, -1.22848101, -0.8035775 ,
       -1.61292983, -3.79155733, -2.71017772, -2.43590042, -2.68945347,
       -0.03193044, -1.34623966, -1.42908934, -1.94423995, -3.51312967,
       -3.01963476, -2.52185978, -1.50593474, -2.36064638])

array([1.86720094e-01, 3.03509579e-02, 2.92217435e-02, 6.84975137e-01,
       3.44770089e-01, 2.11005864e-01, 6.76810211e-01, 6.74941298e-01,
       9.43228489e-02, 4.03585502e-02, 1.62596415e-01, 8.98494186e-01,
       4.59066455e-01, 3.70850677e-01, 4.52072499e-01, 6.37220801e-05,
       1.13272577e-01, 1.27643521e-01, 2.36254311e-01, 7.71380007e-01,
       5.69887131e-01, 3.97486046e-01, 1.41739966e-01, 3.48290708e-01])

3.443080015503284

array([ 2.71828183,  7.3890561 , 20.08553692, 54.59815003])

In [61]:
# Indexing multidimensional arrays in numpy is a bit more involved than in python, but we
# will look at how it works for 2D arrays and extrapolate from that.

rand_array_2d = rand_array.reshape((6, 4)) # Reshapes our 24 element one dimensional array
                                           # into a 2 dimensional 6 by 4 array

rand_array_2d

array([[0.43211121, 0.17421526, 0.17094369, 0.82763225],
       [0.58717126, 0.45935375, 0.82268476, 0.82154811],
       [0.30712025, 0.20089438, 0.40323246, 0.94788933],
       [0.67754443, 0.6089751 , 0.67236337, 0.00798261],
       [0.33655992, 0.35727233, 0.48605999, 0.87828242],
       [0.75490869, 0.63046494, 0.37648369, 0.59016159]])

In [65]:
# We can index through this by using the same indexing as in lists, but for numpy we should
# try to use this tupled indexing:

display(rand_array_2d[0, 1]) # == rand_array_2d[0][1] == rand_array[1]

display(rand_array_2d[(0, 1)])

0.17421526319883063

0.17421526319883063

In [69]:
display(rand_array_2d[2, :]) # the third (index 2) row in its entirety

display(rand_array_2d[2, ::-1]) # the third row reversed

display(rand_array_2d[::3, ::-1]) # first and fourth rows, reversed

array([0.30712025, 0.20089438, 0.40323246, 0.94788933])

array([0.94788933, 0.40323246, 0.20089438, 0.30712025])

array([[0.82763225, 0.17094369, 0.17421526, 0.43211121],
       [0.00798261, 0.67236337, 0.6089751 , 0.67754443]])

In [74]:
display(rand_array_2d[rand_array_2d > 0.7]) # this gives us a 1 dimensional array of elems

display(np.where(rand_array_2d > 0.7)) # recover exact indices from this
display(rand_array_2d[np.where(rand_array_2d > 0.7)])

display(rand_array_2d[0, rand_array[:4] > 0.6]) # we can use Boolean indexing in individual
                                                # indices and in the entire tuple too.

array([0.82763225, 0.82268476, 0.82154811, 0.94788933, 0.87828242,
       0.75490869])

(array([0, 1, 1, 2, 4, 5]), array([3, 2, 3, 3, 3, 0]))

array([0.82763225, 0.82268476, 0.82154811, 0.94788933, 0.87828242,
       0.75490869])

array([0.82763225])

In [75]:
# Since the arithmetic operations are defined for numpy arrays, concatenation is performed
# through numpy.concatenate to which we pass a tuple (A1, A2, ..., An) of arrays we would
# like to concatenate, in the order that we need.

random_array1 = np.random.random(10)
random_array2 = np.random.random(10)

display(random_array1)
display(random_array2)

combined = np.concatenate((random_array1,
                           random_array2))

display(combined)

array([0.73732874, 0.59614645, 0.09247963, 0.40227096, 0.5896865 ,
       0.11541288, 0.38819426, 0.05976145, 0.58395785, 0.1667254 ])

array([0.34809538, 0.42189912, 0.24947387, 0.7699528 , 0.55131022,
       0.02629206, 0.77097582, 0.09321751, 0.7027962 , 0.57953883])

array([0.73732874, 0.59614645, 0.09247963, 0.40227096, 0.5896865 ,
       0.11541288, 0.38819426, 0.05976145, 0.58395785, 0.1667254 ,
       0.34809538, 0.42189912, 0.24947387, 0.7699528 , 0.55131022,
       0.02629206, 0.77097582, 0.09321751, 0.7027962 , 0.57953883])