# Appendix A: Advance Numpy

In [5]:
import numpy as np
import pandas as pd

## A.1 ndarray Object Internals

In [6]:
array = np.arange(1, 100)

In [7]:
array[::-1]

array([99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83,
       82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
       65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
       48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
       31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15,
       14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [8]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [9]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [10]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)

In [11]:
np.issubdtype(ints.dtype, np.integer)

True

In [12]:
np.issubdtype(floats.dtype, np.floating)

True

In [13]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

## A.2 Advanced Array Manipulation

In [14]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [15]:
arr.reshape(2, 4)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

One of the passed shape dimensions can be –1, in which case the value used for that
dimension will be inferred from the data:

In [16]:
arr = np.arange(15)
arr.reshape(5, -1)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

ravel does not produce a copy of the underlying values if the values in the result were contiguous in the original array. The flatten method behaves like ravel except it always returns a copy of the data:

In [17]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C Versus Fortran Order
Functions like reshape and ravel accept an order argument indicating the order to use the data in the array. This is usually set to 'C' or 'F' in most cases (there are also less commonly used options 'A' and 'K'; see the NumPy documentation


In [19]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [20]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [21]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

There are some convenience functions, like vstack and hstack, for common kinds of
concatenation. The preceding operations could have been expressed as:

In [22]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
first, second, third = np.split(arr, [1, 3])

In [25]:
print(first)
print(second)
print(third)

[[0 1 2 3]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[]


### Stacking helpers: r_ and c_
There are two special objects in the NumPy namespace, r_ and c_, that make stacking arrays more concise:

In [26]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = np.random.randn(3, 2)

In [27]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 1.91780533,  1.08668383],
       [ 0.74433499, -0.53483292],
       [ 0.37413022, -0.62047975]])

In [28]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [ 1.91780533,  1.08668383,  3.        ],
       [ 0.74433499, -0.53483292,  4.        ],
       [ 0.37413022, -0.62047975,  5.        ]])

In [29]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating arrays

In [30]:
arr = np.arange(3)

In [31]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [32]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

tile, on the other hand, is a shortcut for stacking copies of an array along an axis.
Visually you can think of it as being akin to “laying down tiles”:

In [33]:
np.tile(arr, 2)

array([0, 1, 2, 0, 1, 2])

In [34]:
np.tile(arr, (3, 2))

array([[0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1, 2]])

### Fancy Indexing Equivalents: take and put

In [35]:
arr = np.arange(10) * 100
inds = [7, 1, 3, 9]
arr[inds]

array([700, 100, 300, 900])

There are alternative ndarray methods that are useful in the special case of only making a selection on a single axis:

In [36]:
arr.take(inds)

array([700, 100, 300, 900])

In [37]:
arr.put(inds, 42)

In [38]:
arr

array([  0,  42, 200,  42, 400, 500, 600,  42, 800,  42])

### A.3 Broadcasting

#### The Broadcasting Rule
Two arrays are compatible for broadcasting if for each trailing dimension (i.e., starting from the end) the axis lengths match or if either of the lengths is 1. Broadcasting is then performed over the missing or length 1 dimensions.
<div style="margin-top:20px;">
<img src="examples/broadcasting.png"/>
</div>

In [39]:
arr = np.random.randn(4, 3)
arr.mean(0)

array([-0.3981219 ,  0.82306612, -0.24570856])

In [40]:
demeaned = arr - arr.mean(0)
demeaned

array([[-0.47507337, -0.38178626, -0.56188449],
       [ 0.16142964,  0.96277993,  1.1240475 ],
       [-0.47310422, -1.01651092, -0.88515836],
       [ 0.78674796,  0.43551725,  0.32299535]])

<hr>

In [41]:
arr

array([[-0.87319527,  0.44127985, -0.80759305],
       [-0.23669226,  1.78584605,  0.87833894],
       [-0.87122613, -0.1934448 , -1.13086693],
       [ 0.38862606,  1.25858336,  0.07728678]])

In [44]:
row_means = arr.mean(1)
row_means.shape

(4,)

In [45]:
row_means.reshape((4,1))

array([[-0.41316949],
       [ 0.80916424],
       [-0.73184595],
       [ 0.57483207]])

In [46]:
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)

array([-3.70074342e-17,  1.11022302e-16,  7.40148683e-17, -3.70074342e-17])

<div style="margin-top:20px;">
<img src="examples/broadcasting_over_axis_1_of_a_2D_array.png"/>
</div>

<div style="margin-top:20px;">
<img src="examples/broadcasting_over_axis_0_of_a_3D_array.png"/>
</div>

<div style="margin-top:20px;">
<img src="examples/compatible_2d_array_shapes_for_broadcasting_over_a_3D_array.png"/>
</div>