### Source: Python Data Science Handbook, Jake VanderPlas, O'Reilly
### Chapter 2: Introduction to NumPy

Complete list of examples: https://github.com/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/02.02-The-Basics-Of-NumPy-Arrays.ipynb

### Fixed-Type Arrays in Python

In [2]:
from sys import getsizeof
l = list(range(10))
print(l)
getsizeof(l)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


208

In [4]:
import array
l = list(range(10))
a = array.array('i', l) # 'i' is the type code for integer
getsizeof(a)

104

In [5]:
l2 = [1, 2, 'three']
l2

[1, 2, 'three']

The Python array object provides more efficient storage of array-based data.

NumPy adds to this efficient operations on data

### Creating NumPy arrays from Python lists

In [6]:
import numpy as np

a_np = np.array(l)
print(a_np)
getsizeof(a_np)

[0 1 2 3 4 5 6 7 8 9]


176

In [10]:
a = np.array([1, 3, 4, 5, 9, 7])
print(a)
a.nbytes

[1 3 4 5 9 7]


48

In [11]:
# Explicitly set data type
b = np.array([1, 3, 4, 5, 9, 7], dtype='int32')
print(b)
b.nbytes

[1 3 4 5 9 7]


24

### NumPy arrays can be multidimensional (list of lists)

In [6]:
[[i, j] for i in [4,5,6] for j in [7,8,9]]

[[4, 7], [4, 8], [4, 9], [5, 7], [5, 8], [5, 9], [6, 7], [6, 8], [6, 9]]

In [13]:
# 3x3 array
np.array([range(i, i+3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Creating arrays from scratch

In [14]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [15]:
# 3x5 array of floating point numbers
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [16]:
# 3x5 array of approx value for pi
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [17]:
# array of numbers, increments of 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [12]:
# Identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### NumPy Array Attributes

In [20]:
import numpy as np
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6) # one-dimensional array
x2 = np.random.randint(10, size=(3,4)) # two-dimensional array
x3 = np.random.randint(10, size=(3,4,5)) # three-dimensional array

print(x3)


[[[8 1 5 9 8]
  [9 4 3 0 3]
  [5 0 2 3 8]
  [1 3 3 3 7]]

 [[0 1 9 9 0]
  [4 7 3 2 7]
  [2 0 0 4 5]
  [5 6 8 4 1]]

 [[4 9 8 1 1]
  [7 9 9 3 6]
  [7 2 0 3 5]
  [9 4 4 6 4]]]


In [14]:
print(x3.ndim, x3.shape, x3.size)

3 (3, 4, 5) 60


### Accessing individual elements

In [21]:
x1

array([5, 0, 3, 3, 7, 9])

In [22]:
x1[0]

5

In [23]:
x1[-2] # second element from end

7

In [24]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [19]:
x2[0]

array([3, 5, 2, 4])

In [20]:
x2[0,0]

3

In [21]:
# modify a value
x2[0,0] = 12
x2[0,1] = 20
x2

array([[12, 20,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [26]:
# operate on all elements, array arithmetic
x2 * 10

array([[30, 50, 20, 40],
       [70, 60, 80, 80],
       [10, 60, 70, 70]])

### Computations on NumPy Arrays: Universal Functions (UFuncs)

In [28]:
np.random.seed(0)

def compute_reciprocals(values):
    # initialize an empty array
    output = np.empty(len(values))
    for i in range(len(values)):
        # assign values to the array
        output[i] = 1.0 / values[i] 
    return output

values = np.random.randint(1, 10, size=5)
print(values)

compute_reciprocals(values)

[6 1 4 4 8]


array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [31]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array) # 2 seconds, really slow!

2.5 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Introducing UFuncs

UFuncs: quickly execute repeated operations on elements in NumPy arrays

In [32]:
# these two statements produce identical results
print(compute_reciprocals(values))
print(1/values)

[0.16666667 1.         0.25       0.25       0.125     ]
[0.16666667 1.         0.25       0.25       0.125     ]


In [33]:
%timeit 1/big_array # 1.37 ms, much faster!

1.89 ms ± 63.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Array slicing

In [34]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
# first five elements
x[:5]

array([0, 1, 2, 3, 4])

In [36]:
# elements after 5
x[5:]

array([5, 6, 7, 8, 9])

In [37]:
# middle of array
x[4:7]

array([4, 5, 6])

In [38]:
# every other element
x[::2]

array([0, 2, 4, 6, 8])

In [39]:
# reverse elements
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [40]:
x2 = np.random.randint(10, size=(3,4))
x2

array([[7, 1, 8, 8],
       [0, 5, 2, 7],
       [6, 4, 9, 8]])

In [47]:
# first column
x2[0:2, 0]

array([7, 0])

In [44]:
# second column
x2[:, 1]

array([1, 5, 4])

In [50]:
# modifying slice changes the original array, the slice is just a view!
x2[:, 1] = -5

In [51]:
x2

array([[ 7, -5,  8,  8],
       [ 0, -5,  2,  7],
       [ 6, -5,  9,  8]])

In [52]:
# copy of an array
first_column = x2[:, 0].copy()
first_column

array([7, 0, 6])

In [53]:
first_column = first_column * 2
first_column

array([14,  0, 12])

In [55]:
# however the first column in the original array remains unchanged
x2

array([[ 7, -5,  8,  8],
       [ 0, -5,  2,  7],
       [ 6, -5,  9,  8]])

In [60]:
first_column_orig = x2[:, 0]
first_column_orig = first_column_orig * 2
first_column_orig

array([[14, -5,  8,  8],
       [ 0, -5,  2,  7],
       [12, -5,  9,  8]])

### Reshaping arrays

In [51]:
array1 = np.arange(1,10)
array1

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
grid = array1.reshape((3,3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Concatenating arrays

In [54]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

### Splitting arrays

In [62]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
x1, x2 = np.split(x, [3])
print(x1)
print(x2)

[0 1 2]
[3 4 5 6 7 8 9]


In [65]:
x1, x2, x3 = np.split(x, [3, 5])
print(x1)
print(x2)
print(x3)

[0 1 2]
[3 4]
[5 6 7 8 9]


In [56]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [59]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [70]:
type(grid)

numpy.ndarray