In [1]:
import numpy as np

## Arrays

### Data types
NumPy arrays contain values of a single type.

When constructing arrays its data type can be specfied using a string or the associated NumPy object.

In [19]:
np.zeros(10, dtype='int16')
np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

The following table lists the standard available data types

| Data type  | Description                                                                 |
|------------|-----------------------------------------------------------------------------|
| bool_      | Boolean (True or False) stored as a byte                                    |
| int_       | Default integer type (same as C long; normally either int64 or int32)       |
| intc       | Identical to C int (normally int32 or int64)                                |
| intp       | Integer used for indexing (same as C ssize_t; normally either int32 or int64)|
| int8       | Byte (-128 to 127)                                                          |
| int16      | Integer (-32768 to 32767)                                                   |
| int32      | Integer (-2147483648 to 2147483647)                                         |
| int64      | Integer (-9223372036854775808 to 9223372036854775807)                       |
| uint8      | Unsigned integer (0 to 255)                                                 |
| uint16     | Unsigned integer (0 to 65535)                                               |
| uint32     | Unsigned integer (0 to 4294967295)                                          |
| uint64     | Unsigned integer (0 to 18446744073709551615)                                |
| float_     | Shorthand for float64                                                       |
| float16    | Half precision float: sign bit, 5 bits exponent, 10 bits mantissa           |
| float32    | Single precision float: sign bit, 8 bits exponent, 23 bits mantissa         |
| float64    | Double precision float: sign bit, 11 bits exponent, 52 bits mantissa        |
| complex_   | Shorthand for complex128                                                    |
| complex64  | Complex number, represented by two 32-bit floats                            |
| complex128 | Complex number, represented by two 64-bit floats                            |


### Generating Arrays with Data

There are numerous methods built into numpy for generating data from scratch.

Here are a few useful examples:

In [6]:
# Create an array with n length of a particular data type
np.zeros(4, dtype=int)

array([0, 0, 0, 0])

In [None]:
# Create a NxM array floating point array filled with ones
np.ones((3, 4), dtype=float)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [8]:
# Create a NxM array floating point array filled with a particular value
np.full((3, 5),5)

array([[5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5]])

In [14]:
# Create an NxM array filled with random values between 0 and 1
np.random.rand(3, 4)

array([[0.24394445, 0.93279747, 0.2063483 , 0.76358987],
       [0.2662649 , 0.2185545 , 0.72974924, 0.63809146],
       [0.1553939 , 0.94900345, 0.48027702, 0.33224061]])

In [9]:
# Create an array filed with a linear sequence
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [10]:
# Create a linearly spaced array of X elements between two values,
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [13]:
# Create a NxM normally distributed array with mean 0 and standard deviation 1
np.random.normal(0, 1, size=(4, 4))

array([[-0.85303289,  0.493218  , -1.32895327, -0.51528394],
       [ 1.46708042, -1.56252265,  0.03486345,  0.5653369 ],
       [ 0.10080724, -0.53473314, -0.02646851, -1.21584473],
       [ 0.25632715, -0.56088758, -0.5120903 ,  2.34229673]])

In [15]:
# Create an n-dimensional identity matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [None]:
# Create an unitialised array
# Odd behaviour: Contents will be whatever random values were already in that memory location
np.empty(5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### Array Attributes

There are a number of useful array attributes.

Let's create some arrays to demonstrate

In [3]:

np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6)  # One-dimensional array
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array

In [None]:
print("x3 ndim: ", x3.ndim) # number of dimensions
print("x3 shape:", x3.shape) # the size of each dimension
print("x3 size: ", x3.size)  # the total size of the array
print("dtype:", x3.dtype) # data type

print("itemsize:", x3.itemsize, "bytes") # size of each element in bytes
print("nbytes:", x3.nbytes, "bytes") # total size of the array in bytes

# nbytes is equivalent to itemsize * size

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype: int32
itemsize: 4 bytes
nbytes: 240 bytes


### Accessing Array Data

#### Accessing Single elements

Indexes can be used to access individual elements like most other languages or data tpyes.

In [5]:
x1[0]

np.int32(5)

In [6]:
# To access elements from the end of the array, you can use negative indexing
x1[-1]

np.int32(9)

In [None]:
# In multidimensional arrays, you can access elements using a comma-separated tuple of indices
x2[0, 0]  # First row, first column


np.int32(7)

In [None]:
x2[-1, -1] # Last row, last column

You can assign values using the same syntax.

x1[0] = 5.1234

x2[0, 0] = 12

Note: in Python using a decimal number will be truncated if the array is an integer.

#### Multidimensional arrays

In [29]:
print(x2[:, 0])  # first column of x2

[3 7 1]


In [30]:
print(x2[0, :])  # first row of x2

[3 5 2 4]


In [31]:
print(x2[0])  # equivalent to x2[0, :]

[3 5 2 4]


#### Array Slicing

We can use standard python list slicing syntax to access multiple elements at once.

x[start:stop:step]

If any of these are omitted they default to the values:
- start - 0
- stop - size of the dimension
- step - 1

Here are some examples:



In [9]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
x[:5]  # First five elements

array([0, 1, 2, 3, 4])

In [None]:
x[5:] # Elements after index 5

In [11]:
x[4:7] # Elements from index 4 to 6

array([4, 5, 6])

In [12]:
x[::2]  # Every second element

array([0, 2, 4, 6, 8])

In [13]:
x[1::2]  # Every second element starting at index 1

array([1, 3, 5, 7, 9])

In the case a negative step is provided, the start and stop are swapped.

In [14]:
x[::-1]  # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [15]:
x[5::-2]  # reversed every other from index 5

array([5, 3, 1])

#### Multidimensional arrays

In [16]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]], dtype=int32)

In [None]:
x2[:2, :3]  # first two rows, first three columns

array([[3, 5, 2],
       [7, 6, 8]], dtype=int32)

In [26]:
x2[::, ::2]  # all rows, every other column

array([[3, 2],
       [7, 8],
       [1, 7]], dtype=int32)

In [27]:
# Reverse the rows and columns
x2[::-1, ::-1]

array([[7, 7, 6, 1],
       [8, 8, 6, 7],
       [4, 2, 5, 3]], dtype=int32)

### Copying Arrays
NumPy array slices return a **view** of the array, which means that they don't create a copy. Any modifications to the view will be reflected in the original array.


In [32]:
print(x2)

x2_sub = x2[:2, :2]
print(x2_sub)

x2_sub[0, 0] = 99
print(x2_sub)

print(x2)

[[3 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]
[[3 5]
 [7 6]]
[[99  5]
 [ 7  6]]
[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


Creating copies is easy, and can be done simply with the .copy() method.

This means you can modify a copy of a subarray without affecting the original data.

In [33]:
x2_sub_copy = x2[:2, :2].copy()

### Reshaping arrays

You can reshape an array with the reshape method.

For this to work, the size of the initial array must match the size of the reshaped array.

In [34]:
# Create a one dimensional array and then reshape it into a 3x3 2D array
grid = np.arange(1, 10).reshape((3, 3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


Another common approach is to use the newaxis keyword within a slice operation

In [38]:
x = np.array([1, 2, 3])

# row vector via reshape
x.reshape((1, 3))

array([[1, 2, 3]])

In [39]:
# row vector via newaxis
x[np.newaxis, :]

array([[1, 2, 3]])

In [40]:
# column vector via reshape
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [41]:
# column vector via newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [None]:
### Array Concatenation and Splitting

In [None]:
# concatenate joins two arraysx = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [43]:
# it can be used for multidimensional arrays as well
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
print(np.concatenate([grid, grid]))


[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]


In [44]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [45]:
# vstack is great for stacking arrays vertically (row-wise) where you have mixed dimensions

x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])

np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

`hstack` is the equivalent for horizontal stacking

### Splitting Arrays
Opposite to joining we might want to split arrays into sub arrays.

In [59]:
# We pass indicies where we want to split the array
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3,5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


Similar to concatenation we have `hsplit` and `vsplit` for splitting along rows and columns

In [60]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [61]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [62]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Computation of Arrays: Universal Functions
Python's default implementation can be very slow with some operations. Due in part to the fact that Python is an interpreted language and not a compiled one.

There have been recent attempts to address this pwith projects like `PyPy`, which is a JIT compiled version of Python.

`Cython`, which converts python code into compilable C code.

`Numba` which converts python code to fast LLVM bytecode.

All have strengths and weaknesses and are nowhere near as popoular as the standard CPython engine.

One such example of poor perfomance is the element-wise operations on arrays. Take this for loop example:

In [66]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output
        
values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [67]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

1.38 s ± 4.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


NumPy provides `UFuncs` which act as an interface into this statically typed compiled routine. This is known as a **vectorized** operation. It can be accomplished by performing an operation on the array, which will then be applied to each element.

In [68]:
print(compute_reciprocals(values))
print(1.0 / values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


We can see the performance improvment here:

In [69]:
%timeit (1.0 / big_array)

505 μs ± 2.46 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


Here are some other examples:

In [70]:
x = np.arange(4)
print("x     =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2)  # floor division
print("-x     = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]
-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [71]:
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

There are also trigonomic ufuncs:

In [73]:
theta = np.linspace(0, np.pi, 3)

In [74]:
print("theta      = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))

theta      =  [0.         1.57079633 3.14159265]
sin(theta) =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [75]:
x = [-1, 0, 1]
print("x         = ", x)
print("arcsin(x) = ", np.arcsin(x))
print("arccos(x) = ", np.arccos(x))
print("arctan(x) = ", np.arctan(x))

x         =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


As well as exponents and logarithms. 

Full details can be found at the following:
- NumPy - http://www.numpy.org/
- SciPy - http://www.scipy.org/

Or using the built in IPYthon help system, type "help(np)", "np?" or "np." to see a list of all available functions. 



In [None]:
## Aggregate Functions

In [76]:
L = np.random.random(100)
sum(L)

np.float64(50.461758453195614)

In [77]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

50 ms ± 290 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
273 μs ± 1.79 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [79]:
np.min(big_array), np.max(big_array)

(np.float64(7.071203171893359e-07), np.float64(0.9999997207656334))