# Numpy

In [124]:
import numpy as np

## Arrays and Vectorized Computation
- ndarray is faster than integer data structure

In [125]:
# Using python
my_list = list(range(1000000)) # return 0 ~ 999,999
%timeit for _ in range(10): my_list2 = [2 * x for x in my_list]

505 ms ± 6.24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [126]:
# Using numpy
my_arr = np.arange(1000000) # return ndarray 0 ~ 999,999
%timeit for _ in range(10): myarr2 = my_arr * 2

4.78 ms ± 184 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [127]:
my_arr?

[0;31mType:[0m        ndarray
[0;31mString form:[0m [     0      1      2 ... 999997 999998 999999]
[0;31mLength:[0m      1000000
[0;31mFile:[0m        /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/numpy/__init__.py
[0;31mDocstring:[0m  
ndarray(shape, dtype=float, buffer=None, offset=0,
        strides=None, order=None)

An array object represents a multidimensional, homogeneous array
of fixed-size items.  An associated data-type object describes the
format of each element in the array (its byte-order, how many bytes it
occupies in memory, whether it is an integer, a floating point number,
or something else, etc.)

Arrays should be constructed using `array`, `zeros` or `empty` (refer
to the See Also section below).  The parameters given here refer to
a low-level method (`ndarray(...)`) for instantiating an array.

For more information, refer to the `numpy` module and examine the
methods and attributes of an array.

Parameters
----------
(for t

## ndarray
- N-dimensionanl array object, or ndarray, enables efficient computations in batch.

In [128]:
# Generate some random data
data = np.random.randn(2, 3)
data

array([[ 1.14894236, -0.69015639,  0.27988269],
       [ 1.05251635, -0.69653037,  0.39405466]])

In [129]:
data * 10

array([[11.48942364, -6.90156385,  2.79882695],
       [10.52516351, -6.96530369,  3.94054659]])

In [130]:
data + data

array([[ 2.29788473, -1.38031277,  0.55976539],
       [ 2.1050327 , -1.39306074,  0.78810932]])

In [131]:
data.shape

(2, 3)

In [132]:
data.dtype

dtype('float64')

In [133]:
data.size

6

In [134]:
data.ndim # Number of array dimensions.

2

## Creating ndarrays

- A. From list of values

In [135]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [136]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

- B. Fill with predefined values (ones, zeros, empty)

In [137]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [138]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

- C. arange(): the ndarray counter part of range

In [139]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## Modifying a ndarray
- change the shape or size of an ndarray

In [140]:
np.reshape(arr2, (4, 2)) # return a view of the original array, doesn't copy data

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [141]:
np.reshape(arr2, (3, 2)) # incompatible size

ValueError: cannot reshape array of size 8 into shape (3,2)

In [142]:
# resize doesn't make an error even though the size is different and it makes a new copy
# make a new copy of data
np.resize(arr2, (2, 2))

array([[1, 2],
       [3, 4]])

## Data type of ndarray
- specify the data type

In [143]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [144]:
arr1.dtype

dtype('float64')

In [145]:
arr2.dtype

dtype('int32')

- Or cast the data type from one to another using astype() method

In [146]:
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [147]:
float_arr = arr.astype(np.float64)
float_arr.dtype

dtype('float64')

In [148]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

## Arithmetic with ndarrays
- Any arithmetic operations between **equal-size** arrays applies the operation **element-wise**

In [149]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [150]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [151]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

## ndarray comparison
- Comparisons between arrays of the same size yield **boolean arrays**

In [152]:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [153]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

## Indexing and Slicing like lists
- ndarrays

In [154]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [155]:
arr[5]

5

In [156]:
arr[5:8]

array([5, 6, 7])

- lists

In [157]:
mylist = list(range(10))
mylist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [158]:
mylist[5]

5

In [159]:
mylist[5:8]

[5, 6, 7]

## Except..
- A slice of a list is a copy

In [160]:
mylist = list(range(10))
mylist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [161]:
list_slice = mylist[5:8]
list_slice

[5, 6, 7]

In [162]:
list_slice[1] = 12
list_slice

[5, 12, 7]

In [163]:
mylist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [164]:
list_slice[:] = 64

TypeError: can only assign an iterable

In [165]:
list_slice[:] = [64]
list_slice

[64]

- A slice of an ndarray is a view

In [166]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [167]:
arr_slice = arr[5:8]
arr_slice

array([5, 6, 7])

In [168]:
arr_slice[1] = 12
arr_slice

array([ 5, 12,  7])

In [169]:
arr

array([ 0,  1,  2,  3,  4,  5, 12,  7,  8,  9])

In [170]:
arr_slice[:] = 64 # assigning to a slice expression 
arr_slice

array([64, 64, 64])

In [171]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

### Why the difference?
- ndarrays don't like to copy!
- Efficient with handling large datasets
- A slice of a ndarray is a view. This means that the data is not copied, and any modifications to the view will be reflected in the source array
- How to make a copy of a slice of a ndarray instead of view:
  ```
  arr[5:8].copy()
  ```

## Indexing and Slicing High dimensional ndarrays

### Boolean Indexing

In [172]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [173]:
names == 'Bob' # operations, including comparison, on ndarrays are elemnet-wise

array([ True, False, False,  True, False, False, False])

- We can use an ndarray of Booleans to as index to select

In [174]:
data_1d = np.random.randn(7)
data_1d

array([-0.35255939, -0.81109347,  1.04322993, -1.50245966,  1.04162339,
        0.54717172,  1.04806073])

In [175]:
data_1d[names=='Bob'] # The boolean array must be of the same length as the array axis it's indexing

array([-0.35255939, -1.50245966])

- Different than slicing, assigning a new variable by selecting data from an array using boolean indexing creates a copy of the data

In [176]:
boolean_1d = data_1d[names=='Bob'] # create a new object
boolean_1d

array([-0.35255939, -1.50245966])

In [177]:
boolean_1d[:] = 0
boolean_1d # new object changed

array([0., 0.])

In [178]:
data_1d # original object unchanged

array([-0.35255939, -0.81109347,  1.04322993, -1.50245966,  1.04162339,
        0.54717172,  1.04806073])

- combine multiple boolean conditions, using boolean arithmetic operators like & and |, but not the keywords **and** and **or**

In [179]:
conds = (names=='Bob') | (names=='Will') # error if replacing | by or
conds

array([ True, False,  True,  True,  True, False, False])

In [180]:
data_1d[conds]

array([-0.35255939,  1.04322993, -1.50245966,  1.04162339])

- ~ negates a condition (but not the keyword not)

In [181]:
~conds

array([False,  True, False, False, False,  True,  True])

In [182]:
data_1d[~conds]

array([-0.81109347,  0.54717172,  1.04806073])

- You just need to know which axis you are indexing

In [183]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

In [184]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [185]:
data

array([[-1.70405666, -0.57704864,  1.31457715, -1.20168114],
       [ 0.6416106 ,  0.14179381,  0.9091641 ,  1.17623066],
       [ 0.39079397,  0.10049542,  0.99189026,  0.36890722],
       [ 1.8375841 , -0.74137591, -0.05145606,  0.20420393],
       [ 0.48153612, -0.23024981, -0.2356334 , -0.05616711],
       [ 0.13973461,  0.06295616,  0.06263944,  0.14210173],
       [ 0.0159376 , -0.67326761,  1.74360489, -2.72205008]])

In [187]:
data[names=='Bob'] # the Boolean indexing apply to the first axis

array([[-1.70405666, -0.57704864,  1.31457715, -1.20168114],
       [ 1.8375841 , -0.74137591, -0.05145606,  0.20420393]])

- Boolean indexing can be combined with the regular indexing with integers

In [191]:
data[(names=='Bob'), 2]

array([ 1.31457715, -0.05145606])

In [193]:
data[(names=='Bob'), :2]

array([[-1.70405666, -0.57704864],
       [ 1.8375841 , -0.74137591]])

In [199]:
second_boolean_index = [False, True, True, False]
rows = data[names=='Bob']

In [200]:
rows[:, second_boolean_index]

array([[-0.57704864,  1.31457715],
       [-0.74137591, -0.05145606]])

- However, combining Boolean indexing on multiple axes doesn't work as thought.
- We can find answer in Fancy Indexing

In [204]:
data[names=='Bob', second_boolean_index]

array([-0.57704864, -0.05145606])

### Setting values with Boolean indexing
- To set all of the negative values in data to 0

In [205]:
data

array([[-1.70405666, -0.57704864,  1.31457715, -1.20168114],
       [ 0.6416106 ,  0.14179381,  0.9091641 ,  1.17623066],
       [ 0.39079397,  0.10049542,  0.99189026,  0.36890722],
       [ 1.8375841 , -0.74137591, -0.05145606,  0.20420393],
       [ 0.48153612, -0.23024981, -0.2356334 , -0.05616711],
       [ 0.13973461,  0.06295616,  0.06263944,  0.14210173],
       [ 0.0159376 , -0.67326761,  1.74360489, -2.72205008]])

In [207]:
data[data<0] = 0
data

array([[0.        , 0.        , 1.31457715, 0.        ],
       [0.6416106 , 0.14179381, 0.9091641 , 1.17623066],
       [0.39079397, 0.10049542, 0.99189026, 0.36890722],
       [1.8375841 , 0.        , 0.        , 0.20420393],
       [0.48153612, 0.        , 0.        , 0.        ],
       [0.13973461, 0.06295616, 0.06263944, 0.14210173],
       [0.0159376 , 0.        , 1.74360489, 0.        ]])

- Setting values by rows

In [209]:
data

array([[0.        , 0.        , 1.31457715, 0.        ],
       [0.6416106 , 0.14179381, 0.9091641 , 1.17623066],
       [0.39079397, 0.10049542, 0.99189026, 0.36890722],
       [1.8375841 , 0.        , 0.        , 0.20420393],
       [0.48153612, 0.        , 0.        , 0.        ],
       [0.13973461, 0.06295616, 0.06263944, 0.14210173],
       [0.0159376 , 0.        , 1.74360489, 0.        ]])

In [210]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [212]:
data[names!='Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.6416106 , 0.14179381, 0.9091641 , 1.17623066],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.13973461, 0.06295616, 0.06263944, 0.14210173],
       [0.0159376 , 0.        , 1.74360489, 0.        ]])

In [213]:
x = np.array([[0, 1, 2],[3, 4, 5],[6, 7, 8],[9, 10, 11]])
x[x>5]

array([ 6,  7,  8,  9, 10, 11])

### Fancy indexing
- Fancy indexing is a term adopted by Numpy to describe indexing using **integer arrays(or lists)**
- To select out a subset of the rows in a particular order, you can simply pass a list or ndarray of integers specifying the desired order.

In [217]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [218]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

- Using negative indices selects row from the end

In [219]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

- Fancy indexing works for any other axis too

In [221]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [222]:
arr[:, [2, 1]]

array([[ 2,  1],
       [ 6,  5],
       [10,  9],
       [14, 13],
       [18, 17],
       [22, 21],
       [26, 25],
       [30, 29]])

- However, passing **multiple index arrays** does something slightly different; it selects a one-dimensional array of elements corresponding to **each tuple of indices**.

In [223]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [224]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

- Fancy indexing (Just like Boolean indexing), unlike slicing, always copies the data into a new array

## Transposing Arrays
- Transposing is a special form of reshaping that similarly returns a view on the underlying data without copying anything.
- Arrays have the transpose method and alsothe special T attribute

In [226]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [227]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [228]:
arr2 = arr.T

In [230]:
arr2[:, :] = 2
arr

array([[2, 2, 2, 2, 2],
       [2, 2, 2, 2, 2],
       [2, 2, 2, 2, 2]])

In [231]:
arr2

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

- For higher dimensional arrays, **transpose()** will accept **a tuple of axis** numbers to permute the axes
- **swapaxes()**, which takes **a pair of axis numbers** and switches the indicated axes to rearrange the data
- These functions don't copy data

In [234]:
arr.transpose((1, 0))

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [235]:
arr.swapaxes(1, 0)

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [238]:
arr.swapaxes(0,1)

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [239]:
arr3 = np.arange(1, 25).reshape((2, 3, 4))
arr3

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [241]:
arr3.transpose((2, 0, 1))

array([[[ 1,  5,  9],
        [13, 17, 21]],

       [[ 2,  6, 10],
        [14, 18, 22]],

       [[ 3,  7, 11],
        [15, 19, 23]],

       [[ 4,  8, 12],
        [16, 20, 24]]])

In [242]:
arr3.transpose((2, 1, 0))

array([[[ 1, 13],
        [ 5, 17],
        [ 9, 21]],

       [[ 2, 14],
        [ 6, 18],
        [10, 22]],

       [[ 3, 15],
        [ 7, 19],
        [11, 23]],

       [[ 4, 16],
        [ 8, 20],
        [12, 24]]])

## Universal Function
- A universal function, or ufunc, is a function that performs element-wise operations on data in ndarrays
- unary ufuncs take one array as input

In [243]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [244]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [245]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

### binary ufunc
- binary ufuncs take two arrays as input

In [261]:
x = np.random.randn(8)
y = np.random.randn(8)

In [262]:
x

array([ 0.68359974,  3.00061158,  1.11065556,  1.49798922,  1.22683124,
        0.42307862, -0.15824828, -0.05490988])

In [263]:
y

array([ 1.25179401, -0.87783343,  1.12537192,  0.70224596, -0.11105593,
       -1.08160546,  0.32838096, -0.0671155 ])

In [264]:
np.maximum(x, y)

array([ 1.25179401,  3.00061158,  1.12537192,  1.49798922,  1.22683124,
        0.42307862,  0.32838096, -0.05490988])

- Some ufuncs return more than one arrays.

In [265]:
arr = np.random.randn(7) * 5
arr

array([-4.48583083,  7.53956571, -0.21101998,  2.29797396, -3.33346141,
        0.39063993,  7.09020223])

In [266]:
remainder, whole_part = np.modf(arr)

In [267]:
remainder

array([-0.48583083,  0.53956571, -0.21101998,  0.29797396, -0.33346141,
        0.39063993,  0.09020223])

In [268]:
whole_part

array([-4.,  7., -0.,  2., -3.,  0.,  7.])

- Ufuncs accept an optional out argument that allows them to operate in-place on arrays

In [270]:
arr

array([-4.48583083,  7.53956571, -0.21101998,  2.29797396, -3.33346141,
        0.39063993,  7.09020223])

In [271]:
# compute the square root for each element of the array arr given as the first argument, 
# and store the result in the array arr passed as the second argument
np.sqrt(arr, arr)

  np.sqrt(arr, arr)


array([       nan, 2.74582696,        nan, 1.51590698,        nan,
       0.62501194, 2.66274336])

In [272]:
arr

array([       nan, 2.74582696,        nan, 1.51590698,        nan,
       0.62501194, 2.66274336])

## Mathematical and Statistical Methods
- Computing sum, mean, and std(standart deviation) either by calling the array member method or using the top-level NumPy function.

In [273]:
arr = np.random.randn(5, 4)
arr

array([[ 1.17411163,  0.28427222, -0.94195297,  1.99338262],
       [ 0.62718037,  0.04641632, -0.00890745,  0.27997538],
       [ 0.45484458, -0.61127019, -0.19914092,  1.19088003],
       [ 0.46488887,  0.55685366,  2.14643044,  1.30718092],
       [ 1.63993421,  0.19367403,  1.26224539, -0.29610009]])

In [274]:
arr.mean()

0.5782449530321887

In [275]:
np.mean(arr)

0.5782449530321887

In [276]:
arr.sum()

11.564899060643773

### Statistics of Arrays
- static over the given axis

In [277]:
arr

array([[ 1.17411163,  0.28427222, -0.94195297,  1.99338262],
       [ 0.62718037,  0.04641632, -0.00890745,  0.27997538],
       [ 0.45484458, -0.61127019, -0.19914092,  1.19088003],
       [ 0.46488887,  0.55685366,  2.14643044,  1.30718092],
       [ 1.63993421,  0.19367403,  1.26224539, -0.29610009]])

In [280]:
# collapse axis 1
# same as arr.mean(1) or np.mean(arr, 1)
arr.mean(axis=1)

array([0.62745337, 0.23616616, 0.20882838, 1.11883847, 0.69993839])

In [281]:
arr.sum(axis=0) # collapse axis 0

array([4.36095966, 0.46994604, 2.2586745 , 4.47531887])

### Cumulative Statistics
- Cumulative functions like cumsum and cumprod do not aggregate, instead of producing an array of the intermediate results

In [282]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [283]:
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28])

In [285]:
arr = np.arange(9).reshape((3, 3,))
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [286]:
arr.cumsum(axis=0)

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])

In [287]:
arr.cumprod(axis=1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

## Boolean arrays
- Boolean values are coerced to 1 (True) and 0 (False) in preceding methods.
- Thus, sum is often used as **a means of counting True values** in a boolean array.

In [288]:
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive

48

- **"any"** tests whether one or more values in an array is True, while **"all"** checks if every value is True

In [289]:
bools = np.array([False, False, True, False])
bools.any()

True

In [290]:
bools.all()

False

- These methods also work with **non-boolean arrays**, where non-zero elements evaluate to True.

## Sorting in place
- Like Python's built-in list type, NumPy arrays can be **sorted in-place** with the sort **member method**.

In [291]:
arr = np.random.randn(6)
arr

array([-0.49414793, -0.25373879,  1.48097638, -0.08038384,  0.74261572,
        1.11861009])

In [293]:
arr.sort()
arr

array([-0.49414793, -0.25373879, -0.08038384,  0.74261572,  1.11861009,
        1.48097638])

- We can sort each one-dimensional section of values in a multidimensional array in-place **along an axis** by passing the axis number to sort

In [294]:
arr = np.random.randn(5, 3)
arr

array([[-1.57135074, -0.19007847,  1.67200352],
       [ 0.06872642, -0.51568108,  0.61749916],
       [ 0.21266512,  1.86053197, -0.99881178],
       [ 1.15999632, -1.16708733, -1.97362332],
       [ 1.65151351,  0.1533982 ,  0.3149267 ]])

In [296]:
arr.sort(1)
arr

array([[-1.57135074, -0.19007847,  1.67200352],
       [-0.51568108,  0.06872642,  0.61749916],
       [-0.99881178,  0.21266512,  1.86053197],
       [-1.97362332, -1.16708733,  1.15999632],
       [ 0.1533982 ,  0.3149267 ,  1.65151351]])

## Sorting and copy
- The top-level method **np.sort** returns a sorted **copy** of an array instead of modifying the array in-place.

In [299]:
np.sort(arr)

array([[-1.57135074, -0.19007847,  1.67200352],
       [-0.51568108,  0.06872642,  0.61749916],
       [-0.99881178,  0.21266512,  1.86053197],
       [-1.97362332, -1.16708733,  1.15999632],
       [ 0.1533982 ,  0.3149267 ,  1.65151351]])

In [300]:
np.sort(arr, 1)

array([[-1.57135074, -0.19007847,  1.67200352],
       [-0.51568108,  0.06872642,  0.61749916],
       [-0.99881178,  0.21266512,  1.86053197],
       [-1.97362332, -1.16708733,  1.15999632],
       [ 0.1533982 ,  0.3149267 ,  1.65151351]])

## Set logic
- NumPy has some basic set operations for one-dimensional ndarrays.
- **np.unique**

In [301]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [302]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

array([1, 2, 3, 4])

- **np.in1d**, tests membership of the values in one array in another

In [303]:
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])

array([ True, False, False,  True,  True, False,  True])

## Save and Load Arrays
- **np.save** and **np.load** efficiently saving and loading array data on disk in the np **built-in binary format**
- **np.save** a **single** array in an uncompressed raw binary format with file extension **.npy**

In [304]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [305]:
np.save('some_array', arr)

In [307]:
np.load('some_array.npy')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

- **np.savez multiple** arrays in an uncompressed raw binary format with file extension **.npz**
- When loading an .npz file, you get back a **dict-like** object

In [310]:
arr = np.arange(10)
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.savez('array_archive.npz', a=arr, b=names)
arch = np.load('array_archive.npz')

In [314]:
list(arch.keys())

['a', 'b']

In [315]:
arch.keys()

KeysView(<numpy.lib.npyio.NpzFile object at 0x7fe8f399ca60>)

In [313]:
arch['a']

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Linear algebra
- matrix multiplication, decompositions, determinants, and other square matrix math
- element-wise multiplication
- dot or @: Matrix multiplication (dot product), both member function and np.dot

In [317]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])

In [318]:
x

array([[1., 2., 3.],
       [4., 5., 6.]])

In [319]:
y

array([[ 6., 23.],
       [-1.,  7.],
       [ 8.,  9.]])

In [320]:
x.dot(y) # same as np.dot(x, y) or x@y

array([[ 28.,  64.],
       [ 67., 181.]])