### Source: Python Data Science Handbook, Jake VanderPlas, O'Reilly
### Chapter 2: Introduction to NumPy

### Fixed-Type Arrays in Python

In [1]:
import array
L = list(range(10))
A = array.array('i', L) # 'i' is the type code for integer
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

The Python array object provides more efficient storage of array-based data.

NumPy adds to this efficient operations on data

### Creating NumPy arrays from Python lists

In [2]:
import numpy as np

np.array([1, 3, 4, 5, 9, 7])

array([1, 3, 4, 5, 9, 7])

In [3]:
# Explicitly set data type
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

### NumPy arrays can be multidimensional (list of lists)

In [27]:
[[i, j] for i in [4,5,6] for j in [7,8,9]]

[[4, 7], [4, 8], [4, 9], [5, 7], [5, 8], [5, 9], [6, 7], [6, 8], [6, 9]]

In [4]:
np.array([range(i, i+3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Creating arrays from scratch

In [5]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
# 3x5 array of floating point numbers
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [7]:
# 3x5 array of approx value for pi
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [8]:
# array of numbers, increments of 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [9]:
# Identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### NumPy Array Attributes

In [31]:
import numpy as np
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6) # one-dimensional array
x2 = np.random.randint(10, size=(3,4)) # two-dimensional array
x3 = np.random.randint(10, size=(3,4,5)) # three-dimensional array

print(x3)


[[[8 1 5 9 8]
  [9 4 3 0 3]
  [5 0 2 3 8]
  [1 3 3 3 7]]

 [[0 1 9 9 0]
  [4 7 3 2 7]
  [2 0 0 4 5]
  [5 6 8 4 1]]

 [[4 9 8 1 1]
  [7 9 9 3 6]
  [7 2 0 3 5]
  [9 4 4 6 4]]]


In [32]:
print(x3.ndim, x3.shape, x3.size)

3 (3, 4, 5) 60


### Accessing individual elements

In [12]:
x1

array([5, 0, 3, 3, 7, 9])

In [13]:
x1[0]

5

In [14]:
x1[-2] # second element from end

7

In [15]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [16]:
x2[0]

array([3, 5, 2, 4])

In [17]:
x2[0,0]

3

In [18]:
# modify a value
x2[0,0] = 12
x2[0,1] = 20
x2

array([[12, 20,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [19]:
# operate on all elements, array arithmetic
x2 * 2

array([[24, 40,  4,  8],
       [14, 12, 16, 16],
       [ 2, 12, 14, 14]])

### Computations on NumPy Arrays: Universal Functions

In [20]:
np.random.seed(0)

def compute_reciprocals(values):
    # initialize an empty array
    output = np.empty(len(values))
    for i in range(len(values)):
        # assign values to the array
        output[i] = 1.0 / values[i] 
    return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [21]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array) # 2 seconds, really slow!

2.33 s ± 42.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Introducing UFuncs

UFuncs: quickly execute repeated operations on elements in NumPy arrays

In [22]:
# these two statements produce identical results
print(compute_reciprocals(values))
print(1/values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [23]:
%timeit 1/big_array # 1.37 ms, much faster!

1.43 ms ± 29.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
