In [3]:
import numpy as np

# 4 NumPy Basics: Arrays and Vectorized Computation

In [2]:
my_arr = np.arange(1_000_000)

In [3]:
my_list = list(range(1_000_000))

numpy algos are faster and use less memory than regular python

In [4]:
%timeit my_arr2 = my_arr * 2

652 µs ± 6.08 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [5]:
%timeit my_list2 = [x * 2 for x in my_list]

23.5 ms ± 260 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])

In [7]:
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [8]:
data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [9]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [10]:
data.shape

(2, 3)

In [11]:
data.type

AttributeError: 'numpy.ndarray' object has no attribute 'type'

In [12]:
data.dtype

dtype('float64')

Remember to use dtype to see type for numpy objects

### Creating ndarrays

In [1]:
data1= [6, 7.5, 8, 0,1]

In [4]:
arr1 = np.array(data1)

In [5]:
arr1

array([6. , 7.5, 8. , 0. , 1. ])

#### multi-dimensional array from equal length lists

In [7]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [9]:
arr2 = np.array(data2)

In [10]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [11]:
arr2.ndim

2

In [12]:
arr2.shape

(2, 4)

In [13]:
arr1.dtype

dtype('float64')

In [14]:
arr2.dtype

dtype('int64')

In [17]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

Create array with a given size

In [18]:
np.ones((3, 6))

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [19]:
np.empty((2, 3, 2))

array([[[0.00000000e+000, 1.00937611e-320],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 1.61590357e+184]],

       [[5.58670628e-091, 1.52045380e-051],
        [6.54775654e-043, 1.45071711e+165],
        [3.99910963e+252, 1.46030983e-319]]])

### Important Array Functions

* array:	Convert input data (list, tuple, array, or other sequence type) to an ndarray either by inferring a data type or explicitly 
            specifying a data type; copies the input data by default
* asarray:	Convert input to ndarray, but do not copy if the input is already an ndarray
* arange:	Like the built-in range but returns an ndarray instead of a list
* ones, ones_like:	Produce an array of all 1s with the given shape and data type; ones_like takes another array and produces a ones array 
            of the same shape and data type
* zeros, zeros_like:	Like ones and ones_like but producing arrays of 0s instead
* empty, empty_like:	Create new arrays by allocating new memory, but do not populate with any values like ones and zeros
* full, full_like:  Produce an array of the given shape and data type with all values set to the indicated "fill value"; full_like takes 
                    another array and produces a filled array of the same shape and data type
* eye, identity:	Create a square N × N identity matrix (1s on the diagonal and 0s elsewhere)

### Data Types for ndarrays

In [25]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [26]:
arr1.dtype

dtype('float64')

In [27]:
arr2.dtype

dtype('int32')

In [29]:
arr = np.array([1, 2, 3, 4, 5])

In [30]:
arr.dtype

dtype('int64')

In [31]:
float_arr = arr.astype(np.float64)

In [32]:
float_arr

array([1., 2., 3., 4., 5.])

In [33]:
float_arr.dtype

dtype('float64')

In [34]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [35]:
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [37]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

Cast Strings that are numbers to appopriate type

In [38]:
numeric_strings = np.array(["1.25", "-9.6", "42"], dtype=np.string_)

In [39]:
numeric_strings

array([b'1.25', b'-9.6', b'42'], dtype='|S4')

In [40]:
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

### Arithmetic with Arrays

In [4]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [5]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [6]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [7]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [11]:
arr = np.arange(10)

In [12]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [13]:
arr[5:8]

array([5, 6, 7])

In [14]:
arr[5:8] = 12

In [15]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

### Slices are view inside original array, change to slice changes original array

In [16]:
arr_slice = arr[5:8]

In [17]:
arr_slice[1] = 12345

In [18]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [20]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [62]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [22]:
arr2d[2]

array([7, 8, 9])

In [25]:
arr2d[0][2]

3

In [26]:
arr2d[0, 2]

3

### 2D Array
Remember:  array_name[row][column] for 2 dimensional array. Elment at each index is a one-dimensional array.

### Multi-dimensional arrays
Omitting the later indices returns object with lower dimensional ndarray.  See example below for 3D array 2x2x3

In [41]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [42]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [43]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [44]:
old_values = arr3d[0].copy()

In [45]:
arr3d[0] = 42
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [46]:
arr3d[0] = old_values

In [47]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

If we give 2 [n, m] indices for 3D array, returns values whose indice is n and starts at m. Ex 1, 0, 0 and 1, 0, 1  etc.

In [48]:
arr3d[1, 0]

array([7, 8, 9])

In [49]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [50]:
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [51]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [52]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [53]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

I don't understand the above.  Seems like it is getting last two elements by first index slice argument, then chopping off the index 0 elements from those elements.

In [54]:
lower_dim_slice = arr2d[1, :2]

In [55]:
lower_dim_slice

array([4, 5])

In [57]:
lower_dim_slice.shape

(2,)

In [58]:
arr2d[:2, 2]

array([3, 6])

In [59]:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [63]:
arr2d[:2, 1:] = 0

In [64]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

![Slice Examples](images/Ch4_array_slicing_image.png)

In [66]:
names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])

In [68]:
data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2],[-12, -4], [3, 4]])

In [69]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [70]:
data

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

Boolean comparison on array

In [71]:
names == "Bob"

array([ True, False, False,  True, False, False, False])

In [72]:
data[names == "Bob"]

array([[4, 7],
       [0, 0]])

In [73]:
data[names == "Bob", 1:]

array([[7],
       [0]])

In [75]:
names != "Bob"

array([False,  True,  True, False,  True,  True,  True])

In [76]:
cond = names == "Bob"

In [77]:
data[~cond]

array([[  0,   2],
       [ -5,   6],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [78]:
mask = (names == "Bob") | (names == "Will")

In [79]:
mask

array([ True, False,  True,  True,  True, False, False])

In [80]:
data[mask]

array([[ 4,  7],
       [-5,  6],
       [ 0,  0],
       [ 1,  2]])

Python "and" and "or" do not work in numpy arrays, need to use &amp; and |

In [81]:
data[data < 0] = 0

In [82]:
data

array([[4, 7],
       [0, 2],
       [0, 6],
       [0, 0],
       [1, 2],
       [0, 0],
       [3, 4]])

Set whole row or column using 1D boolean array

In [83]:
data[names != "Joe"] = 7

In [84]:
data

array([[7, 7],
       [0, 2],
       [7, 7],
       [7, 7],
       [7, 7],
       [0, 0],
       [3, 4]])

## Fancy Indexing - Multid Dim Array Indexed By Integer Array

In [85]:
arr = np.zeros((8, 4))

In [87]:
for i in range(8):
    arr[i] = i

In [88]:
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [89]:
arr[[4, 3, 0 ,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

This retrieved rows 4, 3, 0 and 6 in order

In [90]:
arr = np.arange(32).reshape((8, 4))

In [91]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [92]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [93]:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

Fancy indexing always copies data into a new array if assigning!!

In [96]:
arr = np.arange(15).reshape((3, 5))

In [97]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [98]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

Use for matrix computations. Ex - when computing the inner matrix porduct using numpy.dot

In [101]:
arr = np.array([[0, 1, 0], [1, 2, -2], [6, 3, 2], [-1, 0, -1], [1, 0, 1]])


In [102]:
arr

array([[ 0,  1,  0],
       [ 1,  2, -2],
       [ 6,  3,  2],
       [-1,  0, -1],
       [ 1,  0,  1]])

In [100]:
np.dot(arr.T, arr)

array([[39, 20, 12],
       [20, 14,  2],
       [12,  2, 10]])

In [103]:
arr.swapaxes(0, 1)

array([[ 0,  1,  6, -1,  1],
       [ 1,  2,  3,  0,  0],
       [ 0, -2,  2, -1,  1]])

# Section 4.2 - More numpy stuff

In [104]:
samples = np.random.standard_normal(size=(4, 4))

In [105]:
samples

array([[ 0.8650028 ,  1.87691263,  0.56208018, -1.75893144],
       [ 1.12670465, -0.16079309,  0.96350598, -0.37698981],
       [-1.01514062,  0.54220141, -0.48222466,  0.10517373],
       [ 1.48548622, -1.48657561, -1.12895993,  0.97118359]])

In [106]:
from random import normalvariate

In [107]:
N = 1_000_000

In [108]:
%timeit samples = [normalvariate(0, 1) for _ in range(N)]

274 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [109]:
%timeit np.random.standard_normal(N)

14.3 ms ± 46.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [110]:
rng = np.random.default_rng(seed=12345)

In [111]:
data = rng.standard_normal((2, 3))

In [112]:
type(rng)

numpy.random._generator.Generator

### Random Number Generator Methods

[random num gen](https://wesmckinney.com/book/numpy-basics#tbl-table_numpy_random)

## 4.3 Universal Functions - AKA Perform Operation on Each Element / Element Wise

In [113]:
arr = np.arange(10)

In [114]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [115]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [116]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

Maximum - take two arrays and return a single array as result

In [117]:
x = rng.standard_normal(8)
y = rng.standard_normal(8)

In [118]:
x

array([-1.3677927 ,  0.6488928 ,  0.36105811, -1.95286306,  2.34740965,
        0.96849691, -0.75938718,  0.90219827])

In [119]:
y

array([-0.46695317, -0.06068952,  0.78884434, -1.25666813,  0.57585751,
        1.39897899,  1.32229806, -0.29969852])

In [120]:
x + y

array([-1.83474588,  0.58820328,  1.14990246, -3.2095312 ,  2.92326717,
        2.3674759 ,  0.56291088,  0.60249976])

In [121]:
np.maximum(x, y)

array([-0.46695317,  0.6488928 ,  0.78884434, -1.25666813,  2.34740965,
        1.39897899,  1.32229806,  0.90219827])

In [122]:
arr = rng.standard_normal(7) * 5

In [123]:
arr

array([ 4.51459671, -8.10791367, -0.7909463 ,  2.24741966, -6.71800536,
       -0.40843795,  8.62369966])

In [124]:
remainder, whole_part = np.modf(arr)

In [125]:
remainder

array([ 0.51459671, -0.10791367, -0.7909463 ,  0.24741966, -0.71800536,
       -0.40843795,  0.62369966])

In [127]:
whole_part

array([ 4., -8., -0.,  2., -6., -0.,  8.])

Use out argument to assign results into an existing array instead of creating a new one

In [128]:
out = np.zeros_like(arr)

In [129]:
np.add(arr, 1)

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

In [130]:
np.add(arr, 1, out=out)

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

In [131]:
out

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

FUNCITON REFERENCE: 
(https://wesmckinney.com/book/numpy-basics#tbl-table_unary_ufuncs)