In [1]:
#A.1 ndarray Object Internals

#Imports
import numpy as np

In [2]:
#Creating a shape and printing it.
np.ones((10, 5)).shape

(10, 5)

In [3]:
#Creating strides.
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [4]:
#Creating dtypes and assigning them to variables.
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)

In [5]:
#Printing out the int dtype.
np.issubdtype(ints.dtype, np.integer)

True

In [6]:
#Printing out the float dtype.
np.issubdtype(floats.dtype, np.floating)

True

In [7]:
#Checking all of the parent classes of a specific dtype.
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [8]:
#Printing out the dtype.
np.issubdtype(ints.dtype, np.number)

True

In [9]:
#A.2 Advanced Array Manipulation
#Creating an array.
arr = np.arange(8)
#Printing out the array.
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [10]:
#Reshaping the array.
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [11]:
#Reshaping a multidimensional array.
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [12]:
#Passing a new array dimension to arr.
arr = np.arange(15)

In [13]:
#Reshaping the array and lessening the length of the array by 1.
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [14]:
#Creating a new array.
other_arr = np.ones((3, 5))
#Displaying the shape of the new array.
other_arr.shape

(3, 5)

In [15]:
#Reshaping the array.
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [16]:
#Reshaping the array.
arr = np.arange(15).reshape((5, 3))
#Printing out the array.
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [17]:
#Raveling the array.
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
#Flattening the array to return a copy of the data.
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [19]:
#C Versus Fortran Order
#Creating an array by reshaping the initial.
arr = np.arange(12).reshape((3, 4))
#Displaying the contents.
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [20]:
#Raveling the array.
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [21]:
#Raveling the array but passing F this time to order the data.
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [22]:
#Concatenating and Splitting Arrays
#Assigning an array to variable.
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
#Creating a second array and assigning it to a variable.
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
#Combining the arrays with an axis of 0.
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
#Combining the arrays with an axis of 1.
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
#Combining the arrays using vstack.
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [25]:
#Combining the arrays using hstack.
np.hstack((arr1, arr2))
#Creating an array of random values.
arr = np.random.randn(5, 2)

In [26]:
# Printing the values in the array.
arr

array([[ 0.75814987,  0.29520899],
       [-1.89063419, -0.39629719],
       [ 0.58219391, -0.0626208 ],
       [ 1.20964859, -1.22664994],
       [-0.68804703,  0.53143494]])

In [27]:
#Splitting the array into 3 arrays.
first, second, third = np.split(arr, [1, 3])
#Displaying the first array.
first

array([[0.75814987, 0.29520899]])

In [28]:
#Displaying the second array.
second

array([[-1.89063419, -0.39629719],
       [ 0.58219391, -0.0626208 ]])

In [29]:
#Displaying the third array.
third

array([[ 1.20964859, -1.22664994],
       [-0.68804703,  0.53143494]])

In [30]:
#Creating an array.
arr = np.arange(6)
#Creating another array and using the reshape function.
arr1 = arr.reshape((3, 2))
#Creating another array filled with random variables.
arr2 = np.random.randn(3, 2)
#Stacking the arrays concisely.
np.r_[arr1, arr2]

array([[ 0.00000000e+00,  1.00000000e+00],
       [ 2.00000000e+00,  3.00000000e+00],
       [ 4.00000000e+00,  5.00000000e+00],
       [ 6.44776147e-01,  9.26056398e-01],
       [-3.69543196e-01,  6.47016014e-02],
       [ 1.31854502e-01, -1.93818547e-03]])

In [31]:
#Stacking the arrays concisely.
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00],
       [ 2.00000000e+00,  3.00000000e+00,  1.00000000e+00],
       [ 4.00000000e+00,  5.00000000e+00,  2.00000000e+00],
       [ 6.44776147e-01,  9.26056398e-01,  3.00000000e+00],
       [-3.69543196e-01,  6.47016014e-02,  4.00000000e+00],
       [ 1.31854502e-01, -1.93818547e-03,  5.00000000e+00]])

In [32]:
#Translating slices into arrays.
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [33]:
#Creating a new array.
arr = np.arange(3)
#Printing out the array.
arr

array([0, 1, 2])

In [34]:
#Making the array bigger by multiplying the elements by 3, so it is 3 times larger.
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [35]:
#Repeating the elements in the following positions, 2, 3, 4.
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [36]:
#Creating an array of random values.
arr = np.random.randn(2, 2)
#Displaying the array.
arr

array([[-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051]])

In [37]:
#Repeating the array under axis 0.
arr.repeat(2, axis=0)

array([[-0.30469263, -1.91991973],
       [-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051],
       [-0.35008467,  1.21585051]])

In [38]:
#Repeating the array under axis 1.
arr.repeat([2, 3], axis=1)

array([[-0.30469263, -0.30469263, -1.91991973, -1.91991973, -1.91991973],
       [-0.35008467, -0.35008467,  1.21585051,  1.21585051,  1.21585051]])

In [39]:
#Displaying the contents of arr.
arr

array([[-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051]])

In [40]:
#Stacking copies of an array along an axis
np.tile(arr, 2)

array([[-0.30469263, -1.91991973, -0.30469263, -1.91991973],
       [-0.35008467,  1.21585051, -0.35008467,  1.21585051]])

In [41]:
#Displaying the new contents of arr.
arr

array([[-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051]])

In [42]:
#a tuple indicating the layout of the “tiling”.
np.tile(arr, (2, 1))

array([[-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051],
       [-0.30469263, -1.91991973],
       [-0.35008467,  1.21585051]])

In [43]:
#Creating a larger tile.
np.tile(arr, (3, 2))

array([[-0.30469263, -1.91991973, -0.30469263, -1.91991973],
       [-0.35008467,  1.21585051, -0.35008467,  1.21585051],
       [-0.30469263, -1.91991973, -0.30469263, -1.91991973],
       [-0.35008467,  1.21585051, -0.35008467,  1.21585051],
       [-0.30469263, -1.91991973, -0.30469263, -1.91991973],
       [-0.35008467,  1.21585051, -0.35008467,  1.21585051]])

In [44]:
#Using fancy indexing to get and set subsets.
#First assining an array multiplied by 100 to a variable.
arr = np.arange(10) * 100
#Creating a list.
inds = [7, 1, 2, 6]
#Adding the list to the array and printing it.
arr[inds]

array([700, 100, 200, 600])

In [45]:
#Using the take function on the lsit.
arr.take(inds)
#Filling the array with inds and 42 by using the put function
arr.put(inds, 42)
#Displaying the array.
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [46]:
#Placing the following into the arr array.
arr.put(inds, [40, 41, 42, 43])
#Displaying the contents of arr.
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [47]:
#Creating a list.
inds = [2, 0, 2, 1]
#Creating an array of random values.
arr = np.random.randn(2, 4)
#Displaying the contents.
arr

array([[ 0.41312876,  0.27299702, -0.46818294,  1.9762905 ],
       [-0.17463565, -1.28198038,  0.63556241, -0.37832434]])

In [48]:
#Getting a subset using take.
arr.take(inds, axis=1)

array([[-0.46818294,  0.41312876, -0.46818294,  0.27299702],
       [ 0.63556241, -0.17463565,  0.63556241, -1.28198038]])

In [49]:
#A.3 Broadcasting
#Creating a new array.
arr = np.arange(5)
#Displaying the array.
arr

array([0, 1, 2, 3, 4])

In [50]:
#Multiplying each value by 4 and displaying it.
arr * 4

array([ 0,  4,  8, 12, 16])

In [51]:
#Creating an array of random variables.
arr = np.random.randn(4, 3)
#Printing the mean.
arr.mean(0)

array([ 0.55126449, -0.09907365, -0.0943584 ])

In [52]:
#Subtracting the mean(0) from the array and assigning the value to a new variable.
demeaned = arr - arr.mean(0)
#Displaying the contents of the new variable.
demeaned

array([[-1.1202044 ,  1.07423709,  0.90719906],
       [ 1.02365813, -1.04475351,  0.76977962],
       [-0.02002929,  0.4191172 , -1.43078198],
       [ 0.11657556, -0.44860079, -0.24619669]])

In [53]:
#Displaying the means of the values at position 0.
demeaned.mean(0)

array([5.55111512e-17, 1.38777878e-17, 4.16333634e-17])

In [54]:
#Displaying the array.
arr
#Assinging the means to a variable.
row_means = arr.mean(1)


In [55]:
#Displaying the shape of the row means.
row_means.shape

(4,)

In [56]:
#Reshaping the row means.
row_means.reshape((4, 1))

array([[ 0.40635473],
       [ 0.3688389 ],
       [-0.22462054],
       [-0.07346316]])

In [57]:
#Assigning a new value to demeaned.
demeaned = arr - row_means.reshape((4, 1))
#Displaying the values at positon 1. 
demeaned.mean(1)

array([ 0.00000000e+00, -3.70074342e-17,  0.00000000e+00, -1.85037171e-17])

In [58]:
#Broadcasting Over Other Axes.
#Throws an error.
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [None]:
#reshaping the row means to be shape (4, 1) instead of (4,)
arr - arr.mean(1).reshape((4, 1))

In [59]:
#Creating a new array filled with 0s.
arr = np.zeros((4, 4))
#Slicing an array and assigning it to a variable.
arr_3d = arr[:, np.newaxis, :]
#Displaying the shape of the array.
arr_3d.shape

(4, 1, 4)

In [60]:
#Creating a new array with random values.
arr_1d = np.random.normal(size=3)
#Using the new axis inside of the parameters to assign as an index.
arr_1d[:, np.newaxis]

array([[0.23240229],
       [0.2241557 ],
       [1.452266  ]])

In [61]:
#Using the new axis inside of the parameters to assign as an index.
arr_1d[np.newaxis, :]

array([[0.23240229, 0.2241557 , 1.452266  ]])

In [62]:
#Creating a new array.
arr = np.random.randn(3, 4, 5)
#Taking the means of the subarray.
depth_means = arr.mean(2)
#Displaying the means.
depth_means

array([[-0.24847287, -0.59057469, -0.14879918, -0.50123247],
       [ 0.26651863,  0.86853871, -0.04478102,  0.21787942],
       [ 0.51440991, -0.06948722,  0.06315815,  0.55603836]])

In [63]:
#displaying the shape of the array.
depth_means.shape

(3, 4)

In [64]:
#Creating a variable that is deprived of the depths.
demeaned = arr - depth_means[:, :, np.newaxis]
#Displaying the value of a certain array in demeaned.
demeaned.mean(2)

array([[-3.33066907e-17,  0.00000000e+00, -6.66133815e-17,
         3.33066907e-17],
       [ 2.22044605e-17, -1.77635684e-16,  2.22044605e-17,
        -8.32667268e-18],
       [ 2.22044605e-17,  0.00000000e+00,  2.22044605e-17,
         0.00000000e+00]])

In [65]:
#Creating a funciton.
def demean_axis(arr, axis=0):
    means = arr.mean(axis)

    # This generalizes things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

In [66]:
#Filling a new array with zeros.
arr = np.zeros((4, 3))
#Assinging all values as 5.
arr[:] = 5
#Displaying the contents of arr.
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [67]:
#Creating a new set of columns.
col = np.array([1.28, -0.42, 0.44, 1.6])
#Reconstructing the shape.
arr[:] = col[:, np.newaxis]
#Displaying the array.
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [68]:
#Modifying the first 2 rows.
arr[:2] = [[-1.37], [0.509]]
#Displaying the array.
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [69]:
#A.4 Advanced ufunc Usage
#Creating an array from range 10.
arr = np.arange(10)
#Adding the elements and displaying on the screen.
np.add.reduce(arr)

45

In [70]:
#Summing up the array.
arr.sum()

45

In [71]:
#Initialize the random number.
np.random.seed(12346)  # for reproducibility
#Creating a random array.
arr = np.random.randn(5, 5)
#Sorting rows in array.
arr[::2].sort(1) # sort a few rows
#Determining which cells are less than the array range on the right.
arr[:, :-1] < arr[:, 1:]
#Checking whether each value in the row is sorted.
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [72]:
#Overloading the last array and creating a new array of a specific shape.
arr = np.arange(15).reshape((3, 5))
#Producing an array of the same size as the intermediate accumulated values.
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]], dtype=int32)

In [73]:
#Overloading the last array.
arr = np.arange(3).repeat([1, 2, 2])
#Displaying the contents of the array.
arr

array([0, 1, 1, 2, 2])

In [74]:
#Performing a pairwise cross-product between the two arrays using outer.
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [75]:
#Creating dimensions.
x, y = np.random.randn(3, 4), np.random.randn(5)
#Concatenating the dimensions.
result = np.subtract.outer(x, y)
#Displaying the variable.
result.shape

(3, 4, 5)

In [76]:
#Overloading arr with a new array range.
arr = np.arange(10)
#Aggregating slices of the array together.
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17], dtype=int32)

In [77]:
#Creating a new array using the outer method.
arr = np.multiply.outer(np.arange(4), np.arange(5))
#Displaying the contents of the array.
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [78]:
#Aggregating slices of the array together.
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]], dtype=int32)

In [79]:
#Creating a new function to add elements together.
def add_elements(x, y):
    return x + y
#Assigning a variable to the sum of elements.
add_them = np.frompyfunc(add_elements, 2, 1)
#Utilizing the newly crated variable acting as a function on 2 ranges.
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [80]:
#Specifying the output type with numpy.vectorize and overloading the old variable acting as a function.
add_them = np.vectorize(add_elements, otypes=[np.float64])
#Applying the variable acting as a function to two ranges yet again.
add_them(np.arange(8), np.arange(8))


array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [81]:
#Creating a new array.
arr = np.random.randn(10000)
#Adding the elements together following a certain time.
%timeit add_them(arr, arr)

1.07 ms ± 54.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [82]:
#Timing the addition of the arrays together.
%timeit np.add(arr, arr)

2.14 µs ± 50.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [83]:
#A.5 Structured and Record Arrays
#Creating a variable of a couple of sets, or structured array.
dtype = [('x', np.float64), ('y', np.int32)]
#Creating a structured array.
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
#Displaying the array.
sarr

array([(1.5       ,  6), (3.14159265, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [84]:
#Displaying the first element, which is a set.
sarr[0]

(1.5, 6)

In [85]:
#Displaying a specific value to a key of the set specified.
sarr[0]['y']

6

In [86]:
#Displaying a specific value to a key.
sarr['x']

array([1.5       , 3.14159265])

In [87]:
#Nested dtypes and Multidimensional Fields
#Creating a nested dtype.
dtype = [('x', np.int64, 3), ('y', np.int32)]
#Creating an array filled with 0s.
arr = np.zeros(4, dtype=dtype)
#Displaying the array.
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [88]:
#Displaying the array at index 0 and with a key of x.
arr[0]['x']

array([0, 0, 0], dtype=int64)

In [89]:
#Printing all of the arrays under key x.
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int64)

In [90]:
#Creating a nested structure.
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
#Forming the array
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
#Displaying the value under key x.
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [91]:
#Displaying the value under key y.
data['y']

array([5, 6])

In [92]:
#Displaying the values under key x, grouping a.
data['x']['a']

array([1., 3.])

In [93]:
#A.6 More About Sorting
#Creating an array.
arr = np.random.randn(6)
#Sorting the array.
arr.sort()
#Displaying the array.
arr

array([-1.08199644,  0.37588273,  0.80139193,  1.13969136,  1.28881614,
        1.84126094])

In [94]:
#Overidding the original array with an array of random values.
arr = np.random.randn(3, 5)
#Displaying the new array.
arr

array([[-0.33176812, -1.47108206,  0.87050269, -0.08468875, -1.13286962],
       [-1.01114869, -0.34357617,  2.17140268,  0.12337075, -0.01893118],
       [ 0.17731791,  0.7423957 ,  0.85475634,  1.03797268, -0.32899594]])

In [95]:
#Sorting the array.
arr[:, 0].sort()  # Sort first column values in-place
#Displaying the array.
arr

array([[-1.01114869, -1.47108206,  0.87050269, -0.08468875, -1.13286962],
       [-0.33176812, -0.34357617,  2.17140268,  0.12337075, -0.01893118],
       [ 0.17731791,  0.7423957 ,  0.85475634,  1.03797268, -0.32899594]])

In [96]:
#Creating an array of random values.
arr = np.random.randn(5)
#Displaying the contents of the array.
arr

array([-1.11807759, -0.24152521, -2.0051193 ,  0.73788753, -1.06137462])

In [97]:
#Sorting the array.
np.sort(arr)


array([-2.0051193 , -1.11807759, -1.06137462, -0.24152521,  0.73788753])

In [98]:
#Displaying the original array.
arr

array([-1.11807759, -0.24152521, -2.0051193 ,  0.73788753, -1.06137462])

In [99]:
#Creating a new array of random values.
arr = np.random.randn(3, 5)
#Displaying the contents of the array.
arr

array([[ 0.59545348, -0.26822958,  1.33885804, -0.18715572,  0.91108374],
       [-0.32150045,  1.00543901, -0.51683937,  1.19251887, -0.19893404],
       [ 0.39691349, -1.76381537,  0.60709023, -0.22215536, -0.21707838]])

In [100]:
#Sorting the array.
arr.sort(axis=1)
#Displaying the contents of the array.
arr

array([[-0.26822958, -0.18715572,  0.59545348,  0.91108374,  1.33885804],
       [-0.51683937, -0.32150045, -0.19893404,  1.00543901,  1.19251887],
       [-1.76381537, -0.22215536, -0.21707838,  0.39691349,  0.60709023]])

In [101]:
#Displaying specific values in the array.
arr[:, ::-1]

array([[ 1.33885804,  0.91108374,  0.59545348, -0.18715572, -0.26822958],
       [ 1.19251887,  1.00543901, -0.19893404, -0.32150045, -0.51683937],
       [ 0.60709023,  0.39691349, -0.21707838, -0.22215536, -1.76381537]])

In [102]:
#Assigning an array to a variable.
values = np.array([5, 0, 1, 3, 2])
#Creating a variables of reorder of the array above.
indexer = values.argsort()
#Displaying the contents of the array.
indexer

array([1, 2, 4, 3, 0], dtype=int64)

In [103]:
#Inserting the indexer values inside of values.
values[indexer]

array([0, 1, 2, 3, 5])

In [104]:
#Overridding the array with new random values.
arr = np.random.randn(3, 5)
#Assigning the first element the values of the values variable.
arr[0] = values
#Displaying the values of arr.
arr

array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [-0.36360302, -0.13775933,  2.17773731, -0.47280687,  0.8356152 ],
       [-0.20885016,  0.23159352,  0.72798172, -1.3918432 ,  1.99558262]])

In [105]:
#Displaying the sorted values of arr.
arr[:, arr[0].argsort()]

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [-0.13775933,  2.17773731,  0.8356152 , -0.47280687, -0.36360302],
       [ 0.23159352,  0.72798172,  1.99558262, -1.3918432 , -0.20885016]])

In [106]:
#Creating an array of first names.
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
#Creating an array of last names.
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
#Sorting the names with lexsort.
sorter = np.lexsort((first_name, last_name))
#Calling the sorter function.
sorter

array([1, 2, 3, 0, 4], dtype=int64)

In [107]:
#Zipping up the first names and last names and placing it into a list.
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

In [108]:
#Creating an array called values.
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
#Creating an array called key.
key = np.array([2, 2, 1, 1, 1])
#Sorting the key variable and assigning it to indexer.
indexer = key.argsort(kind='mergesort')
#Displaying the contents of indexer.
indexer
#Mixing the values of indexer and values.
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

In [109]:
#Initializing the random number generator using seed.
np.random.seed(12345)
#Creating an array of random numbers.
arr = np.random.randn(20)
#Displaying the contents of the array.
arr

array([-0.20470766,  0.47894334, -0.51943872, -0.5557303 ,  1.96578057,
        1.39340583,  0.09290788,  0.28174615,  0.76902257,  1.24643474,
        1.00718936, -1.29622111,  0.27499163,  0.22891288,  1.35291684,
        0.88642934, -2.00163731, -0.37184254,  1.66902531, -0.43856974])

In [110]:
#Partinitioning the array.
np.partition(arr, 3)

array([-2.00163731, -1.29622111, -0.5557303 , -0.51943872, -0.37184254,
       -0.43856974, -0.20470766,  0.28174615,  0.76902257,  0.47894334,
        1.00718936,  0.09290788,  0.27499163,  0.22891288,  1.35291684,
        0.88642934,  1.39340583,  1.96578057,  1.66902531,  1.24643474])

In [111]:
#returns the indices that rearrange the data into the equivalent order
indices = np.argpartition(arr, 3)
#Displaying the contents of indices.
indices

array([16, 11,  3,  2, 17, 19,  0,  7,  8,  1, 10,  6, 12, 13, 14, 15,  5,
        4, 18,  9], dtype=int64)

In [112]:
#Performing the take function of the array.
arr.take(indices)

array([-2.00163731, -1.29622111, -0.5557303 , -0.51943872, -0.37184254,
       -0.43856974, -0.20470766,  0.28174615,  0.76902257,  0.47894334,
        1.00718936,  0.09290788,  0.27499163,  0.22891288,  1.35291684,
        0.88642934,  1.39340583,  1.96578057,  1.66902531,  1.24643474])

In [113]:
#Creating a new, sorted array.
arr = np.array([0, 1, 7, 12, 15])
#Performing a binary search on the sorted array.
arr.searchsorted(9)

3

In [114]:
#Performing a binary search on the sorted array.
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5], dtype=int64)

In [115]:
#Creating a sorted array.
arr = np.array([0, 0, 0, 1, 1, 1, 1])
#Performing a binary search on the sorted array.
arr.searchsorted([0, 1])


array([0, 3], dtype=int64)

In [116]:
#Performing a binary search on the sorted array, with passed parameters.
arr.searchsorted([0, 1], side='right')

array([3, 7], dtype=int64)

In [117]:
#Creating a floor of the integer calculated and assigning it to a variable.
data = np.floor(np.random.uniform(0, 10000, size=50))
#Creating an array.
bins = np.array([0, 100, 1000, 5000, 10000])
#Displaying the contents of data.
data

array([9940., 6768., 7908., 1709.,  268., 8003., 9037.,  246., 4917.,
       5262., 5963.,  519., 8950., 7282., 8183., 5002., 8101.,  959.,
       2189., 2587., 4681., 4593., 7095., 1780., 5314., 1677., 7688.,
       9281., 6094., 1501., 4896., 3773., 8486., 9110., 3838., 3154.,
       5683., 1878., 1258., 6875., 7996., 5735., 9732., 6340., 8884.,
       4954., 3516., 7142., 5039., 2256.])

In [118]:
#Creating a variable of sorted items within bins, based on data.
labels = bins.searchsorted(data)
#Displaying labels.
labels

array([4, 4, 4, 3, 2, 4, 4, 2, 3, 4, 4, 2, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3,
       4, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
       4, 3, 3, 4, 4, 3], dtype=int64)

In [119]:
#Importing pandas.
import pandas as pd
#Creating a series and taking it's mean.
pd.Series(data).groupby(labels).mean()

2     498.000000
3    3064.277778
4    7389.035714
dtype: float64

In [120]:
#Importing numpy.
import numpy as np
#Creating a function that finds the mean distance between points.
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [121]:
#Assigning a random value to x.
x = np.random.randn(10000000)
#Assinging a random value to y.
y = np.random.randn(10000000)
#Checking how long it takes to process the new function
%timeit mean_distance(x, y)

2.57 s ± 30.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [122]:
#Checking how long it takes to process the subtration of distances.
%timeit (x - y).mean()

36.2 ms ± 3.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [123]:
#Importing a library under an alias.
import numba as nb
#Taking the mean distance and performing the jit function on it and assigning the value to a variable.
numba_mean_distance = nb.jit(mean_distance)

In [124]:
#Creating another function.
@nb.jit
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [125]:
#Checking how long it takes to process the new function.
%timeit numba_mean_distance(x, y)

11.7 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [126]:
#Importing a library.
from numba import float64, njit
#Creating yet another function.
@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()

In [127]:
#Importing yet another library.
from numba import vectorize
#Creating a function.
@vectorize
def nb_add(x, y):
    return x + y

In [128]:
#Assigning a range to a variable.
x = np.arange(10)
#Performing the nb_add function on the values.
nb_add(x, x)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18], dtype=int64)

In [129]:
#Accumulating the values.
nb_add.accumulate(x, 0)

ValueError: could not find a matching type for nb_add.accumulate, requested type has type code 'l'

In [130]:
#A.8 Advanced Array Input and Output
#Creating a new memory map.
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                  shape=(10000, 10000))
#Displaying the map.
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
#Slicing a section or mmap.
section = mmap[:5]
#Assigning random data to section.
section[:] = np.random.randn(5, 10000)
#Writing mmap to a disk.
mmap.flush()
#Displaying mmap.
mmap

In [None]:
#Deleting mmap.
del mmap
#Recreating mmap.
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
#Displaying the contents of mmap.
mmap

In [131]:
#A.9 Performance Tips
#Creating 2 new ndarrays with attributes.
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
#Checking the values with flags.
arr_c.flags
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [132]:
# a single, Fortran-style contiguous segment.
arr_f.flags.f_contiguous

True

In [133]:
#Using timeit on sum to see how fast it processes.
%timeit arr_c.sum(1)

401 µs ± 18.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [134]:
#Using timeit on sum to see how fast it processes.
%timeit arr_f.sum(1)

282 µs ± 5.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [135]:
#Grabbing an array without a spcific memory order.
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [136]:
#a Fortran-style contiguous segment.
arr_c[:50].flags.contiguous

True

In [None]:
#Checking for flags.
arr_c[:, :50].flags