# Numpy demo session


- Creating arrays: zeros, ones, full, empty, arange, linspace
- Multiplying lists versus multiplying arrays
- Matrix multiplying
- Transposing
- Reshaping
- Slicing
- Broadcasting
- Adding dimensions (expand_dims, None)
- Concatenating versus stacking
- Computing operations along an axis (e.g., mean)
 - keepdims=True, keepdims=False
- logical_and, logical_or, etc.
- Getting indices (np.where)
 - conditional replacement
- Indexing with Boolean matrices
- See if two arrays are equal

In [1]:
import numpy as np

In [2]:
# Create numpy arrays

x = np.ones((3, 3), dtype=int)
print(x)
print('')

x = np.zeros((3, 3), dtype=int)
print(x)
print('')

x = np.empty((3, 3), dtype=int)
print(x)
print('')

fill_value = 14
x = np.full((3, 3),fill_value, dtype=int)
print(x)

[[1 1 1]
 [1 1 1]
 [1 1 1]]

[[0 0 0]
 [0 0 0]
 [0 0 0]]

[[1 1 1]
 [1 1 1]
 [1 1 1]]

[[14 14 14]
 [14 14 14]
 [14 14 14]]


In [3]:
# Create an array of sequential values.

# The python "range" does not return a set of values.
# It is an object that returns one value at a time
# when "called", e.g., in a for loop.
r = range(10)
print(type(r))
print(r)
for rr in r:
    print(rr)
print('')

# Versus numpy arange, linspace, and logspace, which
# immediately create a set (array) of values when called.

# Use arange. You can define the step size, or let it default to 0.
first = 10
most = 100
step_size = 20
arng = np.arange(first, most, step_size)
print(arng)
print('')

# Use linspace. Don't give step size. Tell it how many evenly-spaced
# points you want, instead.
first = 0
last = 100
how_many = 5
linspc = np.linspace(first, last, how_many)
print(linspc)
print('')

<class 'range'>
range(0, 10)
0
1
2
3
4
5
6
7
8
9

[10 30 50 70 90]

[  0.  25.  50.  75. 100.]



In [4]:
# Multiplying, addition, ...


# Multiplying, a list. It "repeats" the list
x = [1, 2, 3]
print(3 * x)
print('')

# Multiplying, a numpy array by a scalar.
# Each element in the array is multiplied by that scalar.
x = np.array(x)
print(3 * x)
print('')

# Addition, a list. The lists are concatenated.
x = [1,2,3]
y = [4,5,6]
print(x + y)
print('')

# Addition of numpy arrays, of equal shape.
# Elementwise-addition. Elements at the same location
# in x and y are added.
x = np.array(x)
y = np.array(y)
print(x + y)

[1, 2, 3, 1, 2, 3, 1, 2, 3]

[3 6 9]

[1, 2, 3, 4, 5, 6]

[5 7 9]


In [5]:
# Element-wise versus Matrix multiplication

x = np.random.randint(0, 5, size=(3, 3), dtype=int)
y = np.random.randint(0, 5, size=(3, 3), dtype=int)

print(x)
print('')
print(y)
print('')

print(x * y)  # Element-wise. x and y must have the same shape
print('')
print(x @ y)  # Matrix multiply. Shapes must allow it: x.shape=(m, n), y.shape=(n, p)

[[4 1 3]
 [4 0 2]
 [3 2 4]]

[[2 1 3]
 [2 1 1]
 [2 2 2]]

[[8 1 9]
 [8 0 2]
 [6 4 8]]

[[16 11 19]
 [12  8 16]
 [18 13 19]]


In [6]:
# Matrix multiplication

# For matrix times vector, the vector
# can be shape (n,) or (n,1)...
x = np.ones((3,3))
y = np.ones((3,))

print((x @ y).shape)
print('')

# or can be shape (n,1)...
y = np.ones((3,1))

print((x @ y).shape)

(3,)

(3, 1)


In [7]:
## Transposing -- swapping dimensions

x = np.zeros((2,3,4))
print(x.shape)
print('')

y = np.transpose(x, (2, 0, 1))
print(y.shape)

(2, 3, 4)

(4, 2, 3)


In [8]:
## Reshaping. E.g., flattening two of three dimensions...

x = np.zeros((2,3,4))
print(x.shape)

y = np.reshape(x, (2*3, 4))
print(y.shape)

y = np.reshape(x, (2, 3*4))
print(y.shape)

y = np.reshape(x, (2, -1))
print(y.shape)

(2, 3, 4)
(6, 4)
(2, 12)
(2, 12)


In [9]:
## Slicing - Extracting subarrays from the full array
# Syntax for each dimension is... first:last, where
# the "last" is exclusive, e.g., 3:7 will get values
# at indices 3, 4, 5, and 6.

x = np.random.randint(0, 10, size=(5, 6), dtype=int)
print(x)
print('')

y = x[2:4, 2:5]  # row indices 2, 3, and columns indices 2, 3, 4.
print(y)
print('')

# If "first" is not included, it implies first is 0.
# If "first" is not included, it implies last is length of that dimension.

y = x[:3, 4:]   # row indices 0, 1, 2, and columns indices 4 and 5
print(y)
print('')


# You can't slice lists...
try:
    x_list = list(x)
    y_list = x_list[1:, 1:3]
except Exception as e:
    print('\nHad an error...\n')
    print(e)

[[5 1 6 4 8 7]
 [6 8 5 9 4 9]
 [3 4 9 8 5 5]
 [6 2 6 3 8 6]
 [8 4 6 7 0 7]]

[[9 8 5]
 [6 3 8]]

[[8 7]
 [4 9]
 [5 5]]


Had an error...

list indices must be integers or slices, not tuple


In [10]:
# Broadcasting...

x = np.random.randint(0, 10, size=(4, 3), dtype=int)
y = np.random.randint(0, 10, size=(4, 1), dtype=int)

print(x)
print('')
print(y)
print('')

# GOAL: Multiply each column of x by y

# Without broadcasting, we'd have to repeat y three times
# to get a new array of size equal to x, then do the
# multiply
y_rep = np.repeat(y, 3, axis=1)  # Note that we tell it which axis to repeat along
print(y_rep)
print('')

print(x * y_rep)
print('')

# But numpy uses broadcasting--that is, it determines which dimensions of
# length one is should repeat. You don't need to do it explicitly.
print(x*y)

# An array cannot be broadcast along a dimension it does not have.
# E.g., (4,) cannot be broadcast along the second dimension, because that
# dimension doesn't exist. An array of (4, 1) can be broadcast along
# the second dimension (as it was above).
try:
    x = np.random.randint(0, 10, size=(4, 3), dtype=int)
    y = np.random.randint(0, 10, size=(4,), dtype=int)
    z = x * y
except Exception as e:
    print('\nHad an error...\n')
    print(e)

[[3 2 4]
 [5 5 7]
 [7 8 8]
 [7 0 8]]

[[2]
 [2]
 [6]
 [3]]

[[2 2 2]
 [2 2 2]
 [6 6 6]
 [3 3 3]]

[[ 6  4  8]
 [10 10 14]
 [42 48 48]
 [21  0 24]]

[[ 6  4  8]
 [10 10 14]
 [42 48 48]
 [21  0 24]]

Had an error...

operands could not be broadcast together with shapes (4,3) (4,) 


In [11]:
# Expanding dimensions...

# In the above example, numpy could not broadcast
# the (4,) array when we tried to mulitply it by the (4, 3) array.
# To fix that, we'd need to expand the dimensionality
# from 1 (shape (4,)) to 2 (shape(4, 1)).

# Here are two ways to do so...

x = np.random.randint(0, 10, size=(4,), dtype=int)
print(x)
print(x.shape)
print('')

x_expand = np.expand_dims(x, 1)
print(x_expand.shape)
print('')

x_expand = x[:, None]
print(x_expand.shape)
print('')

# We can expand into "earlier" rather than "later" dimensions...
x_expand = np.expand_dims(x, 0)
print(x_expand.shape)
print('')

[6 4 7 3]
(4,)

(4, 1)

(4, 1)

(1, 4)



In [12]:
# Concatenate, stack...
# How can we join two (or more) arrays?

x = np.random.randint(0, 10, size=(3, 4), dtype=int)
y = np.random.randint(0, 10, size=(3, 4), dtype=int)

## Concatenate: no new dimensions...
z = np.concatenate((x, y), axis=1)
print(z)
print('')

## Concatenate: create new dimensions...
z = np.stack((x, y), axis=2)
print(z.shape)
print('')
print(z)
print('')

[[5 4 9 2 1 3 1 8]
 [9 0 7 8 5 6 6 1]
 [9 8 6 5 2 4 5 0]]

(3, 4, 2)

[[[5 1]
  [4 3]
  [9 1]
  [2 8]]

 [[9 5]
  [0 6]
  [7 6]
  [8 1]]

 [[9 2]
  [8 4]
  [6 5]
  [5 0]]]



In [13]:
## Axis specification, for operators

# For operations that take in a vector and output a scalar, we
# can specify which axis the matrix takes place along...

np.random.seed(0)

x = np.random.randint(0, 10, size=(3, 4), dtype=int)
print(x)
print('')

# sum all
print(np.sum(x))
print('')

# sum rows
print(np.sum(x, axis=0))
print('')

# sum columns
print(np.sum(x, axis=1))


# We can specify multiple axes as well. E.g., for a 3-D
# array, sum over the rows and columns, for each 2-D array
# contained along the third dimension.
x = np.random.randint(0, 10, size=(2, 4, 3), dtype=int)
print(x.shape)
print('')

s = np.sum(x, axis=(0, 1))  # sum over height and width, but not depth
print(s.shape)

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]

54

[14 13 13 14]

[11 24 19]
(2, 4, 3)

(3,)


In [14]:
## Keeping dimensions after operating along an axis (or multiple axes)

x = np.random.randint(0, 10, size=(3, 4), dtype=int)
print(x)
print('')

# Don't keep
s = np.sum(x, axis=0)  # keepdims=False is the default
print(s.shape)
print('')

# Don't keep
s = np.sum(x, axis=0, keepdims=True)
print(s.shape)
print('')

[[3 7 0 1]
 [9 9 0 4]
 [7 3 2 7]]

(4,)

(1, 4)



In [15]:
# Boolean arrays

np.random.seed(2)

x = np.random.randint(0, 4, size=(7,), dtype=int)
y = np.random.randint(0, 4, size=(7,), dtype=int)
print(x)
print('')
print(y)
print('')

xis2 = x==2
print(xis2)
print('')

yis3 = y==3
print(yis3)
print('')

both_true = np.logical_and(xis2, yis3)
print(both_true)

[0 3 1 0 2 3 2]

[3 0 3 2 1 3 3]

[False False False False  True False  True]

[ True False  True False False  True  True]

[False False False False False False  True]


In [16]:
# Where (at which indices) is both_true a True??
# Use np.where...

# Notice that the result is a tuple of length 1, containing an array of indices
print(np.where(both_true))
print('')

# You often will want to extract that array of indices from the tuple
print(np.where(both_true)[0])
print('')

# If the input boolean array were 2D rather than 1D, the tuple
# from np.where would have had two numpy arrays, one for each dimension.


# We can also use np.where for conditional replacement...
np.random.seed(0)
X = np.random.choice((0, 1, 2), size=(7,))
Y = np.random.choice((2, 3), size=(7,))
print(X)
print('')
print(Y)
print('')

# Get Z, where element z has value of element y if the element x value is 0,
# else the value of element x.
Z = np.where(x==0, Y, X)
print(Z)

(array([6]),)

[6]

[0 1 0 1 1 2 0]

[3 2 2 2 2 2 3]

[3 1 0 2 1 2 0]


In [17]:
# Indexing with Boolean matrices

# If you don't need/want the indices of where the True values
# are, but rather just the values of some array at those
# indices/locations, you can use the Boolean array directly
# for the indexing...

# Get all values of array y, at locations where values of
# x are > 5...

x = np.random.randint(10, size=(10,), dtype=int)
y = np.random.randint(10, size=(10,), dtype=int)
print(x)
print('')
print(y)
print('')

z = y[x>5]
print(z)

[6 7 7 8 1 5 9 8 9 4]

[3 0 3 5 0 2 3 8 1 3]

[3 0 3 5 3 8 1]


In [18]:
# Are two arrays equal (all elements equal)?

x = np.random.randint(0, 10, size=(3, 4), dtype=int)
y = x.copy()

print(x==y)
print('')

# One approach
print(np.all(x==y))
print('')

# Second approach
print(np.array_equal(x, y))

[[ True  True  True  True]
 [ True  True  True  True]
 [ True  True  True  True]]

True

True
