In [None]:
# Import statements
import numpy as np

In [None]:
# Create an array
a = np.array([2, 3, 4, 5.1])
type(a)
a

array([2. , 3. , 4. , 5.1])

In [None]:
# data type of the values
a.dtype

dtype('float64')

In [None]:
b = np.array([1.2, 3.5, 5.1])
b.dtype

dtype('float64')

In [None]:
# Sequence of sequences creates a 2d array
twod_arr = np.array([(1, 2, 3), (4, 5, 6), (3, 5, 7), (1, 3, 4)])
twod_arr.ndim

2

In [None]:
twod_arr

array([[1, 2, 3],
       [4, 5, 6],
       [3, 5, 7],
       [1, 3, 4]])

In [None]:
# Create an array with all zeros
# Default is float - 3 rows & 4 columns
zeros_ex = np.zeros((3, 4, 2), dtype=int)
zeros_ex.ndim
zeros_ex.dtype
zeros_ex

array([[[0, 0],
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        [0, 0]]])

In [None]:
# Create an array with all ones
# Pass the dtype to override default float
ones_ex = np.ones((2, 3, 4), dtype=int)
ones_ex.ndim

3

In [None]:
ones_ex

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]])

In [None]:
float_arr = np.array([[1.3, 2], [3, 4]], dtype=float)
float_arr

array([[1.3, 2. ],
       [3. , 4. ]])

In [None]:
# Similar to Python range, but returns an array
range_ex = np.arange(10, 30, 5)
range_ex

array([10, 15, 20, 25])

In [None]:
# arange works also with floating point step values
range_fp = np.arange(0, 3, 0.3)
range_fp

array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8, 2.1, 2.4, 2.7])

In [None]:
# To provide the number of elements that are needed as input
np.linspace(0, 2, 9)  # 9 numbers from 0 to 2

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

In [None]:
# Create a 3x3 array of random values between 0 and 1
np.random.random((2, 3, 4))

array([[[0.84419832, 0.05031646, 0.79541218, 0.1229544 ],
        [0.44790303, 0.81772139, 0.72810437, 0.42425065],
        [0.74928159, 0.47266227, 0.67772439, 0.69904533]],

       [[0.28335259, 0.47749174, 0.48589972, 0.12751448],
        [0.09591743, 0.90749461, 0.24058292, 0.53748964],
        [0.22551513, 0.13021746, 0.77189436, 0.34520199]]])

In [None]:
# Create a 2x3 array of random integers in the interval [0, 10)
np.random.seed(3)
np.random.randint(0, 10, (2, 3))

array([[8, 9, 3],
       [8, 8, 0]])

### Attributes

In [None]:
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6)  # One-dimensional array
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array

In [None]:
print("x3 number of dimensions: ", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size: ", x3.size)
print("dtype:", x3.dtype)

x3 number of dimensions:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype: int64


In [None]:
print("itemsize:", x3.itemsize, "bytes") # size (in bytes) of each array element - 64 bits - 8bytes
print("nbytes:", x3.nbytes, "bytes") # lists the total size (in bytes) of the array

itemsize: 8 bytes
nbytes: 480 bytes


### Accessing elements

In [None]:
x1.dtype

dtype('int64')

In [None]:
print(x1[0])
print(x1[4])
print(x1[-2])

5
7
7


In a multi-dimensional array, items can be accessed using a comma-separated tuple of indices:

In [None]:
x2

array([5, 0, 3, 3, 7, 9])

In [None]:
print(x2[0,0])
print(x2[2,3])
print(x2[2,-1])

3
7
7


Values can also be modified using any of the above index notation:

In [None]:
x2[0, 0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

Unlike Python lists, NumPy arrays have a fixed type. This means, for example, that if you attempt to insert a floating-point value to an integer array, the value will be silently truncated. Don't be caught unaware by this behavior!

In [None]:
x1[0] = 3.14159  # this will be truncated!
x1

array([3, 0, 3, 3, 7, 9])

### Slicing

Just as we can use square brackets to access individual array elements, we can also use them to access subarrays with the slice notation, marked by the colon (:) character. The NumPy slicing syntax follows that of the standard Python list; to access a slice of an array x, use this:

x[start:stop:step]

If any of these are unspecified, they default to the values start=0, stop=size of dimension, step=1. We'll take a look at accessing sub-arrays in one dimension and in multiple dimensions.

In [None]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
print(x[:5])  # first five elements
print(x[5::2])  # last five elements
print(x[3:7]) 

[0 1 2 3 4]
[5 7 9]
[3 4 5 6]


In [None]:
x[1::2]  # every other element, starting at index 1

array([1, 3, 5, 7, 9])

### Multi-dimensional subarrays

Multi-dimensional slices work in the same way, with multiple slices separated by commas.
For example:

In [None]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [None]:
# print(x2[:2,:3])  # First one for rows & next for columns
print(x2[:3,::2])  # Using a step function for columns

[[3 2]
 [1 7]]


#### Accessing array rows and columns

One commonly needed routine is accessing of single rows or columns of an array.
This can be done by combining indexing and slicing, using an empty slice marked by a single colon (``:``):

In [None]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [None]:
print(x2[:, 3])  # first column of x2

[4 8 7]


In the case of row access, the empty slice can be omitted for a more compact syntax:

In [None]:
print(x2[0])  # equivalent to x2[0, :]
# print(x2[0, :])

[3 5 2 4]


### Subarrays as no-copy views

One important–and extremely useful–thing to know about array slices is that they return *views* rather than *copies* of the array data.
This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies.
Consider our two-dimensional array from before:

Let's extract a $2 \times 2$ subarray from this:

In [None]:
x2_sub = x2[:2, :2]
print(x2_sub)

[[3 5]
 [7 6]]


Now if we modify this subarray, we'll see that the original array is changed! Observe:

In [None]:
x2_sub[0, 0] = 99
print(x2_sub)

[[99  5]
 [ 7  6]]


In [None]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


### Creating copies of arrays

Despite the nice features of array views, it is sometimes useful to instead explicitly copy the data within an array or a subarray. This can be most easily done with the ``copy()`` method:

In [None]:
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[99  5]
 [ 7  6]]


If we now modify this subarray, the original array is not touched:

In [None]:
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  5]
 [ 7  6]]


In [None]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


## Reshaping of Arrays

For example, if you want to put the numbers 1 through 9 in a $3 \times 3$ grid, you can do the following:

In [None]:
arr = np.arange(1, 10)
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
grid = arr.reshape((3, 3))  # Having 7 will through an error
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

Note that for this to work, the size of the initial array must match the size of the reshaped array. 

Another common reshaping pattern is the conversion of a one-dimensional array into a two-dimensional row or column matrix.

In [None]:
x = np.array([1, 2, 3, 4, 5, 6])

x.reshape((2, 3))

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
x.reshape((3, 2))

array([[1, 2],
       [3, 4],
       [5, 6]])

## Array Concatenation and Splitting

All of the preceding routines worked on single arrays. It's also possible to combine multiple arrays into one, and to conversely split a single array into multiple arrays. We'll take a look at those operations here.

### Concatenation of arrays

Concatenation, or joining of two arrays in NumPy, is primarily accomplished using the routines ``np.concatenate``, ``np.vstack``, and ``np.hstack``.
``np.concatenate`` takes a tuple or list of arrays as its first argument, as we can see here:

In [None]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

You can also concatenate more than two arrays at once:

In [None]:
z = [99, 99, 99]
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


It can also be used for two-dimensional arrays:

In [None]:
grid1 = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [2, 2, 1]])
grid2 = np.array([[10, 20, 3],
                 [40, 50, 60],
                 [2, 3, 1]])

In [None]:
# concatenate along the first axis
np.concatenate([grid1, grid2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [10, 20,  3],
       [40, 50, 60],
       [ 2,  3,  1]])

In [None]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid1, grid2], axis=1)

array([[ 1,  2,  3, 10, 20,  3],
       [ 4,  5,  6, 40, 50, 60],
       [ 2,  2,  1,  2,  3,  1]])

For working with arrays of mixed dimensions, it can be cleaner to use the ``np.vstack`` (vertical stack) and ``np.hstack`` (horizontal stack) functions:

In [None]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])

# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [None]:
# horizontally stack the arrays
y = np.array([[99, 90],
              [99, 90]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99, 90],
       [ 6,  5,  4, 99, 90]])

### Splitting of arrays

The opposite of concatenation is splitting, which is implemented by the functions ``np.split``, ``np.hsplit``, and ``np.vsplit``.  For each of these, we can pass a list of indices giving the split points:

In [None]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 2])
print(x1, x2, x3)

[1 2 3] [] [ 3 99 99  3  2  1]


Notice that *N* split-points, leads to *N + 1* subarrays.
The related functions ``np.hsplit`` and ``np.vsplit`` are similar:

In [None]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [None]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print('---')
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
---
[[ 8  9 10 11]
 [12 13 14 15]]


In [None]:
left, right = np.hsplit(grid, [2])
print(left)
print('---')
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
---
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Exploring NumPy's UFuncs

Ufuncs exist in two flavors: *unary ufuncs*, which operate on a single input, and *binary ufuncs*, which operate on two inputs.
We'll see examples of both these types of functions here.

### Array arithmetic

NumPy's ufuncs feel very natural to use because they make use of Python's native arithmetic operators.
The standard addition, subtraction, multiplication, and division can all be used:

In [None]:
x = np.arange(4)
print("x     =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("-x     = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [None]:
np.add(x, 2)  # Binary UFunc

array([2, 3, 4, 5])

### Absolute value

Just as NumPy understands Python's built-in arithmetic operators, it also understands Python's built-in absolute value function:

In [None]:
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [None]:
np.abs(x)

array([2, 1, 0, 1, 2])

This ufunc can also handle complex data, in which the absolute value returns the magnitude:

In [None]:
x = np.array([3 - 4j, 4 - 3j, 2 + 1j, 0 + 1j])
np.abs(x)

array([5.        , 5.        , 2.23606798, 1.        ])

### Trigonometric functions

NumPy provides a large number of useful ufuncs, and some of the most useful for the data scientist are the trigonometric functions.
We'll start by defining an array of angles:

In [None]:
theta = np.arange(5)
print("theta      = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta      =  [0 1 2 3 4]
sin(theta) =  [ 0.          0.84147098  0.90929743  0.14112001 -0.7568025 ]
cos(theta) =  [ 1.          0.54030231 -0.41614684 -0.9899925  -0.65364362]
tan(theta) =  [ 0.          1.55740772 -2.18503986 -0.14254654  1.15782128]


### Exponents and logarithms

Another common type of operation available in a NumPy ufunc are the exponentials:

In [None]:
x = [[1, 2, 3]]
print("x     =", x)
print("e^x   =", np.exp(x))
print("2^x   =", np.exp2(x))
print("3^x   =", np.power(3, x))

x     = [[1, 2, 3]]
e^x   = [[ 2.71828183  7.3890561  20.08553692]]
2^x   = [[2. 4. 8.]]
3^x   = [[ 3  9 27]]


In [None]:
x = [1, 2, 4, 10]
print("x        =", x)
print("ln(x)    =", np.log(x))
print("log2(x)  =", np.log2(x))
print("log10(x) =", np.log10(x))

x        = [1, 2, 4, 10]
ln(x)    = [0.         0.69314718 1.38629436 2.30258509]
log2(x)  = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


## Sorting

### Functions: ``np.sort`` and ``np.argsort``


In [None]:
x = np.array([2, 1, 4, 3, 5])
print(x)
x.sort()
print(x)

[2 1 4 3 5]
[1 2 3 4 5]


A related function is ``argsort``, which instead returns the *indices* of the sorted elements:

In [None]:
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)

[1 0 3 2 4]


In [None]:
x[i]  # To print the sorted array

array([1, 2, 3, 4, 5])

## Aggregations

NumPy has fast built-in aggregation functions for working on arrays; we'll discuss and demonstrate some of them here.

### Summing the Values in an Array

As a quick example, consider computing the sum of all values in an array.
Python itself can do this using the built-in ``sum`` function:

In [None]:
L = np.random.randint(100, size=100)
np.sum(L)

4928

### Minimum and Maximum

Similarly, Python has built-in ``min`` and ``max`` functions, used to find the minimum value and maximum value of any given array:

In [None]:
# np.min(L)
np.max(L)
# np.mean(L)

99

In [None]:
%timeit np.min(L)
%timeit min(L)

9.43 µs ± 2.38 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)
10.5 µs ± 923 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
