In [233]:
import numpy as np

In [None]:
# 4 NumPy Basics: Arrays and Vectorized Computation

In [234]:
# declare a numpy array and a python list from 0 to 999,999

my_arr = np.arange(1000000)
my_list = list(range(1000000))

In [235]:
# compare the performance of each
# numpy arrays are generally 10 to 100 times faster than their python list counterparts

%timeit myarr2 = my_arr * 2
%timeit my_list2 = [x * 2 for x in my_list]

673 µs ± 47.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
22.7 ms ± 772 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


# ndarray (N-dimensional array object): A Multidimensional Array Object

In [230]:
# declare an array made up of two python lists

data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])

In [231]:
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [232]:
# numpy allows you to perform computations against whole blocks of code

data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [240]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [241]:
# Every array has a shape, a tuple indicating the size of each dimension, in this case 2 rows, 3 columns

data.shape

(2, 3)

In [242]:
data.dtype

dtype('float64')

## Creating arrays using the array function

In [246]:
# This accepts any sequence-like object (including other arrays) and produces a new NumPy array 
# containing the passed data

data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)

In [247]:
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [248]:
# Nested sequences will be converted into a multidimensional array

In [249]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [250]:
arr2 = np.array(data2)

In [251]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [252]:
# returns the number of dimensions (axis) in the array

arr2.ndim

2

In [253]:
arr2.shape

(2, 4)

In [254]:
# np.array tries to infer a good data type for the array it creates

In [107]:
arr1.dtype

dtype('float64')

In [108]:
arr2.dtype

dtype('int64')

In [None]:
# create an array of 0s using np.zeroes

In [109]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [110]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [255]:
# create an array of 1s using np.ones

In [257]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [259]:
np.ones((2, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [260]:
# create an empty array using np.empty. It’s not safe to assume that numpy.empty will return an array of all zeros. This function returns uninitialized memory and thus may contain nonzero "garbage" values. You should use this function only if you intend to 
# populate the new array with data.

In [262]:
np.empty((1, 2, 3))

array([[[ 3. , -0.2,  6. ],
        [ 0. , -6. , 13. ]]])

In [263]:
# create an array using arange(n) from 0 to n-1
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## Data types for ndarrays (datatype:bitsperelement)

In [264]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [265]:
arr1.dtype

dtype('float64')

In [266]:
arr2.dtype

dtype('int32')

In [267]:
arr = np.array([1, 2, 3, 4, 5]) 

In [268]:
arr.dtype

dtype('int64')

In [269]:
float_arr = arr.astype(np.float64)

In [270]:
float_arr

array([1., 2., 3., 4., 5.])

In [271]:
float_arr.dtype

dtype('float64')

In [121]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [122]:
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [272]:
# You can explicitly convert or cast an array from one data type to another using ndarray’s astype method
# Calling astype always creates a new array (a copy of the data), 
# even if the new data type is the same as the old data type.

In [273]:
arr.astype(np.int32)

array([1, 2, 3, 4, 5], dtype=int32)

In [274]:
numeric_strings = np.array(["1.25", "-9.6", "42"], dtype=np.string_)

In [275]:
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

# Using Another Array'ss dtype to Change Array dtype

In [286]:
int_array = np.arange(10)

In [287]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)

In [288]:
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [289]:
zeros_uint32 = np.zeros(8, dtype="u4")

In [290]:
zeros_uint32

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

## Arithmetic with Numpy Arrays

In [309]:
arr = np.array([[1., 2., 3.], [4., 5., 6]])

In [310]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [311]:
# Elements in the array will have arithmetic performed with the element at the matching index

In [312]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [313]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [314]:
1 - arr

array([[ 0., -1., -2.],
       [-3., -4., -5.]])

In [315]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [316]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [317]:
arr ** 2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [318]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])

In [319]:
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [320]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

# Basic Indexing and Slicing

In [343]:
# One-dimensional arrays are simple; on the surface they act similarly to Python lists

In [344]:
arr = np.arange(10)

In [345]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [346]:
arr[5]

5

In [347]:
arr[5:8]

array([5, 6, 7])

In [348]:
arr[5:8] = 12

In [349]:
# As you can see, if you assign a scalar value to a slice, as in arr[5:8] = 12, 
# the value is propagated (or broadcast henceforth) to the entire selection.

In [350]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [351]:
# An important first distinction from Python's built-in lists is that 
# array slices are views on the original array. This means that the 
# data is not copied, and any modifications to the view will be reflected in the source array.

## Create an array from an array slice

In [352]:
# Create an array populated by the values at the sliced indexes

In [353]:
arr_slice = arr[5:8]

In [375]:
arr_slice

array([64, 64, 64])

## Changes to the sliced array show up in the original array

In [376]:
arr_slice[1] = 12345

In [377]:
# Here arr_slice[1] correlates to the 7th index in the original array because it is made from the [5:8] slice

In [378]:
arr

array([    0,     1,     2,     3,     4,    64, 12345,    64,     8,
           9])

In [379]:
# The bare slice will assign to all values in an array. Again, arr_slice is created from arr[5:8]
# so it will overwrite the values at those indexes.
# If you want a copy of a slice of an ndarray instead of a view, you will need to 
# explicitly copy the array—for example, arr[5:8].copy()

In [380]:
arr_slice[:] = 64

In [381]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [382]:
# Individual elements in multidimensional array are indexed like one-dimensional arrays

In [383]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [384]:
arr2d[2]

array([7, 8, 9])

In [None]:
# In a two-dimensional array, the elements at each index are no 
# longer scalars but rather one-dimensional arrays
# array[index of element in array, index of value in element]

In [400]:
arr2d[0,2]

3

In [401]:
arr2d[2,1]

8

In [402]:
# arr3d is an array with 2 lists at each index

In [410]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [411]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [412]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [413]:
arr3d[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [414]:
old_value = arr3d[0].copy()

In [415]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [419]:
# Since arr3d is an array made up of lists of lists, the slice [1, 0] 
# first references the element at index 1 [[7, 8, 9], [10, 11, 12]] and then the element at the 0 index within
# that element [7, 8, 9]

In [420]:
arr3d[1, 0]

array([7, 8, 9])

In [421]:
x = arr3d[1]

In [422]:
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [423]:
x[0]

array([7, 8, 9])

## Indexing with slices

In [424]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [425]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [426]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [429]:
# Slicing for the first 2 elements in the array which are lists [1, 2, 3] and [4, 5, 6]

In [435]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [436]:
# Select the first 2 rows from arr2d, and from those rows
# give me the values from the the second column to the end

In [437]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [438]:
# Select the first row in the array, and give me the value from the first 2 columns 

In [442]:
lower_dim_slice = arr2d[1, :2]

In [443]:
# should return (2,)

In [444]:
arr2d[:2, 2]

array([3, 6])

## Using slicing to replace values in array

In [445]:
# In the first 2 rows, in the first 2 columns replace all values with 0

In [446]:
arr2d[:2, 1:] = 0

In [447]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

## Boolean Indexing

In [448]:
names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])

In [449]:
data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2],
   .....:                  [-12, -4], [3, 4]])


In [450]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [451]:
data

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [180]:
names == "Bob"

array([ True, False, False,  True, False, False, False])

In [None]:
# If arrays are of equal length, the variable passed will return the indexes 
# that match its location in the original array

In [452]:
# Bob can be found at indexes 0 and 3 which is where [4, 7] and [0, 0] are in their array

In [453]:
data[names == "Bob"]

array([[4, 7],
       [0, 0]])

In [None]:
# ???

In [454]:
data[names == "Bob", 1:]

array([[7],
       [0]])

In [None]:
# ??? 

In [183]:
data[names == "Bob", 1]

array([7, 0])

In [455]:
# The ~ operator can be useful when you want to invert a Boolean array referenced by a variable

In [456]:
names != "Bob"

array([False,  True,  True, False,  True,  True,  True])

In [457]:
~(names == "Bob")

array([False,  True,  True, False,  True,  True,  True])

In [458]:
data[~(names == "Bob")]

array([[  0,   2],
       [ -5,   6],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [459]:
# mask is used to call the array and return True for all indexes that contain Bob or Will

In [460]:
mask = (names == "Bob") | (names == "Will")

In [461]:
mask

array([ True, False,  True,  True,  True, False, False])

In [462]:
# Passing mask to the data array to see what indexes correspond to the indexes that contain Bob or Will

In [463]:
data[mask]

array([[ 4,  7],
       [-5,  6],
       [ 0,  0],
       [ 1,  2]])

In [None]:
# Selecting data from an array by Boolean indexing and assigning 
# the result to a new variable always creates a copy of the data, even if the returned array is unchanged.

In [465]:
# Setting all negative values in the array to 0

In [466]:
data[data < 0] = 0

In [467]:
data

array([[4, 7],
       [0, 2],
       [0, 6],
       [0, 0],
       [1, 2],
       [0, 0],
       [3, 4]])

In [468]:
# You can also set whole rows or columns using a one-dimensional Boolean array

In [469]:
data[names != "Joe"] = 7

In [470]:
data

array([[7, 7],
       [0, 2],
       [7, 7],
       [7, 7],
       [7, 7],
       [0, 0],
       [3, 4]])

# Fancy Indexing

In [473]:
# Create an array of zeroes that is 8 rows by 4 columns

In [474]:
arr = np.zeros((8, 4))

In [475]:
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [476]:
# Creating an array in the range of 0 to n - 1

In [482]:
for i in range(8):
    arr[i] = i

In [483]:
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [484]:
# To select a subset of the rows in a particular order, you can simply pass a 
# list or ndarray of integers specifying the desired order

In [485]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [486]:
# Using negative indices selects rows from the end

In [487]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [None]:
# Selecting multiple index arrays selects a one-dimensional array 
# of elements corresponding to each tuple of indices

In [488]:
# you can convert an array from one shape to another without copying any data. 
# To do this, pass a tuple indicating the new shape to the reshape array instance method

In [489]:
arr = np.arange(32).reshape((8, 4))

In [491]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [493]:
# ???

In [494]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [495]:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [496]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [497]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]] = 0

In [498]:
arr

array([[ 0,  1,  2,  3],
       [ 0,  5,  6,  7],
       [ 8,  9,  0, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22,  0],
       [24, 25, 26, 27],
       [28,  0, 30, 31]])

## Transposing Arrays and Swapping Axes

In [499]:
arr = np.arange(15).reshape((3, 5))

In [500]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [501]:
# Using the T attribute to swap axes (3 x 5 -> 5 x 3)

In [502]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [503]:
arr = np.array([[0, 1, 0], [1, 2, -2], [6, 3, 2], [-1, 0, -1], [1, 0, 1
]])


In [504]:
arr

array([[ 0,  1,  0],
       [ 1,  2, -2],
       [ 6,  3,  2],
       [-1,  0, -1],
       [ 1,  0,  1]])

In [505]:
np.dot(arr.T, arr)

array([[39, 20, 12],
       [20, 14,  2],
       [12,  2, 10]])

In [None]:
# The @ infix operator is another way to do matrix multiplication

In [506]:
arr.T @ arr

array([[39, 20, 12],
       [20, 14,  2],
       [12,  2, 10]])

In [213]:
arr

array([[ 0,  1,  0],
       [ 1,  2, -2],
       [ 6,  3,  2],
       [-1,  0, -1],
       [ 1,  0,  1]])

In [None]:
# ndarray has the method swapaxes, which takes a pair of axis numbers and switches 
# the indicated axes to rearrange the data
# swapaxes similarly returns a view on the data without making a copy.

In [214]:
arr.swapaxes(0, 1)

array([[ 0,  1,  6, -1,  1],
       [ 1,  2,  3,  0,  0],
       [ 0, -2,  2, -1,  1]])

## Pseudorandom Number Generation

In [514]:
samples = np.random.standard_normal(size=(4, 4))

In [515]:
samples

array([[-0.20046544, -0.4674584 ,  0.72497836, -1.27543188],
       [-0.09925055, -1.26038605,  0.85465902, -0.28060231],
       [ 0.03294139, -1.66227616, -0.89267503,  1.24513724],
       [ 1.11396649,  1.35518496, -1.71909061, -0.4490223 ]])

In [526]:
from random import normalvariate

In [527]:
N = 1000000

In [528]:
timeit samples = [normalvariate(0, 1) for _ in range(N)]

280 ms ± 2.56 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [529]:
timeit np.random.standard_normal(N)

15.3 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [530]:
data = rng.standard_normal((2, 3))

NameError: name 'rng' is not defined

In [None]:
type(rng)

In [None]:
# output should be numpy.random._generator.Generator

## Universal Functions: Fast Element-Wise Array Functions

In [558]:
arr = np.arange(10)

In [559]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [560]:
# get the square root of each value in the array

In [561]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [562]:
# get the exponential value of each value in the array

In [563]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [564]:
x = rng.standard_normal(8)

NameError: name 'rng' is not defined

In [565]:
y = rng.standard_normal(8)

NameError: name 'rng' is not defined

In [566]:
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [567]:
# should output:
# array([-1.3678,  0.6489,  0.3611, -1.9529,  2.3474,  0.9685, -0.7594,
#         0.9022])

In [568]:
y

NameError: name 'y' is not defined

In [569]:
# should output:
# array([-0.467 , -0.0607,  0.7888, -1.2567,  0.5759,  1.399 ,  1.3223,
#        -0.2997])

In [None]:
# numpy.maximum computes the element-wise maximum of the elements in x and y.

In [570]:
np.maximum(x, y)

NameError: name 'y' is not defined

In [571]:
# should output:
# array([-0.467 ,  0.6489,  0.7888, -1.2567,  2.3474,  1.399 ,  1.3223,
#         0.9022])

In [572]:
arr = rng.standard_normal(7) * 5

NameError: name 'rng' is not defined

In [574]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [575]:
# should output: 
# array([ 4.5146, -8.1079, -0.7909,  2.2474, -6.718 , -0.4084,  8.6237])

In [576]:
# math.modf, it returns the fractional and integral parts of a floating-point array:

In [577]:
remainder, whole_part = np.modf(arr)

In [578]:
remainder

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
# should return array([ 0.5146, -0.1079, -0.7909,  0.2474, -0.718 , -0.4084,  0.6237])

In [579]:
whole_part

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [None]:
# should return array([ 4., -8., -0.,  2., -6., -0.,  8.]) 

In [580]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# should return array([ 4.5146, -8.1079, -0.7909,  2.2474, -6.718 , -0.4084,  8.6237])

In [581]:
out = np.zeros_like(arr)

In [582]:
np.add(arr, 1)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# should return array([ 5.5146, -7.1079,  0.2091,  3.2474, -5.718 ,  0.5916,  9.6237])

In [None]:
# out argument that allows them to assign their results into an existing array rather than create a new one

In [583]:
np.add(arr, 1, out=out)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
# should return array([ 5.5146, -7.1079,  0.2091,  3.2474, -5.718 ,  0.5916,  9.6237])

In [584]:
out

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

# Array-Oriented Programming with Arrays