The essential problem that NumPy solves is fast array processing

For example, suppose we want to create an array of 1 million random draws from a uniform distribution and compute the mean

In [1]:
import numpy as np
x = np.random.uniform(0,1,size=1000000)
x.mean()
#More generally
#NumPy sends operations in batches to optimized C and Fortran code

0.5005367972137021

NumPy is great for operations that are naturally vectorized

Vectorized operations are precompiled routines that can be sent in batches, like

matrix multiplication and other linear algebra routines
generating a vector of random numbers
applying a fixed transformation (e.g., sine or cosine) to an entire array

NumPy Arrays

In [2]:
a = np.zeros(3)
a

array([0., 0., 0.])

In [3]:
type(a)

numpy.ndarray

In [4]:
type(a[0])

numpy.float64

In [5]:
a = np.zeros(3,dtype=int)
type(a[0])

numpy.int64

In [6]:
a

array([0, 0, 0])

Shape and Dimension

In [10]:
z = np.zeros(10)
z

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [8]:
z.shape

(10,)

Here the shape tuple has only one element, which is the length of the array (tuples with one element end with a comma)

In [9]:
z.shape = (10,1)
z

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [12]:
z = np.zeros(4)
#z.shape = (2,2)
z=np.zeros((2,2))
z

array([[0., 0.],
       [0., 0.]])

Creating Arrays

In [13]:
z = np.empty(3)
z

array([0., 0., 0.])

The numbers you see above here are garbage values

In [15]:
z = np.linspace(2,4,5)
z

array([2. , 2.5, 3. , 3.5, 4. ])

In [16]:
#The numbers you see here are garbage values#V
#z = np.identity(2)
z = np.eye(2)
z

array([[1., 0.],
       [0., 1.]])

In addition, NumPy arrays can be created from Python lists, tuples, etc. 

In [17]:
z = np.array([10,20]) # # ndarray from Python list
z

array([10, 20])

In [18]:
type(z)

numpy.ndarray

In [19]:
z = np.array((10,20),dtype=float)
z

array([10., 20.])

In [22]:
z = np.array([[1,2],[3,4]])  # 2D array from a list of lists
z

array([[1, 2],
       [3, 4]])

See also np.asarray, which performs a similar function, but does not make a distinct copy of data already in a NumPy array

def asarray(a, dtype=None, order=None):
    return array(a, dtype, copy=False, order=order)
So it is like array, except it has fewer options, and copy=False. array has copy=True by default.

The main difference is that array (by default) will make a copy of the object, while asarray will not unless necessary.

In [24]:
na = np.linspace(10,20,2)
na is np.asarray(na) #compare object na, only compare pointers,
#since asarray does not make copy,their pointers are the same


True

In [25]:
na is np.array(na)

False

In [26]:
np.asarray(na)

array([10., 20.])

In [27]:
np.array(na)

array([10., 20.])

Array Indexing

In [29]:
z = np.linspace(1,2,5)
z

array([1.  , 1.25, 1.5 , 1.75, 2.  ])

In [30]:
z[1]

1.25

In [31]:
z[0:2]

array([1.  , 1.25])

In [32]:
z[-1]

2.0

In [33]:
z = np.array([[1,2],[3,4]])
z

array([[1, 2],
       [3, 4]])

In [34]:
z[0,0]

1

In [35]:
z[0,1]

2

In [36]:
z[0:]

array([[1, 2],
       [3, 4]])

In [37]:
z[0,:]

array([1, 2])

In [38]:
z[:,1]

array([2, 4])

NumPy arrays of integers can also be used to extract elements

In [39]:
z = np.linspace(2,4,5)
z

array([2. , 2.5, 3. , 3.5, 4. ])

In [40]:
index = np.array((0,2,3))
z[index]

array([2. , 3. , 3.5])

Finally, an array of dtype bool can be used to extract elements

In [46]:
d=np.array([0,1,1,0,0],dtype=bool)
d

array([False,  True,  True, False, False])

In [47]:
z[d]

array([2.5, 3. ])

In [50]:
#An aside: all elements of an array can be set equal to one number using slice notation
z=np.empty(3)
z

array([2. , 3. , 3.5])

In [52]:
z[:]=42
z

array([42., 42., 42.])

Array Methods

In [2]:
import numpy as np
a = np.array((4,3,2,1))
a

array([4, 3, 2, 1])

In [4]:
a.sort()
a

array([1, 2, 3, 4])

In [5]:
a.sum()

10

In [6]:
a.mean()

2.5

In [7]:
a.max()

4

In [10]:
a.min()

1

In [11]:
a.argmax()# Returns the index of the maximal element

3

In [12]:
a.cumsum()# Cumulative sum of the elements of a

array([ 1,  3,  6, 10])

In [13]:
a.cumprod()# Cumulative product of the elements of a

array([ 1,  2,  6, 24])

In [14]:
a.var()

1.25

In [15]:
a.std()

1.118033988749895

In [18]:
a.shape=(2,2)# Equivalent to a.transpose()
a.T

array([[1, 3],
       [2, 4]])

Another method worth knowing is searchsorted()

If z is a nondecreasing array, then z.searchsorted(a) returns the index of the first element of z that is >= a

In [20]:
z = np.linspace(2,4,5)
z

array([2. , 2.5, 3. , 3.5, 4. ])

In [21]:
z.searchsorted(3)

2

In [25]:
a = np.linspace(1,4,4)
b = np.linspace(5,8,4)
a+b


array([ 6.,  8., 10., 12.])

In [26]:
a*b

array([ 5., 12., 21., 32.])

In [27]:
a+10

array([11., 12., 13., 14.])

In [28]:
a*10

array([10., 20., 30., 40.])

In [31]:
A = np.ones((2,2))
B = np.ones((2,2))
A+B

array([[2., 2.],
       [2., 2.]])

In [37]:
A=np.array([[2,3],[2,3]])
A

array([[2, 3],
       [2, 3]])

In [38]:
A*B # A * B is not the matrix product, it is an element-wise product

array([[2., 3.],
       [2., 3.]])

Matrix Multiplication

In [39]:
A@B

array([[5., 5.],
       [5., 5.]])

We can also use @ to take the inner product of two flat arrays

In [42]:
A = np.array((1,2))
B = np.array((10,20))
A@B #inner product of two flat arrays

50

In [43]:
A = np.array(((1,2),(3,4)))
A

array([[1, 2],
       [3, 4]])

In [44]:
A@(0,1) #postmultiplying, the tuple is treated as a column vector

array([2, 4])

In [45]:
(0,1)@A #the tuple is treated as a row vector

array([3, 4])

Mutability and Copying Arrays

Mutability leads to the following behavior (which can be shocking to MATLAB programmers…)

In [46]:
a = np.random.randn(3)
a

array([2.08069276, 0.16638821, 1.87049202])

In [47]:
#What’s happened is that we have changed a by changing b

#The name b is bound to a and becomes just another reference to the array (the Python assignment model is described in more detail later in the course)

#Hence, it has equal rights to make changes to that array

#This is in fact the most sensible default behavior!

#It means that we pass around only pointers to data, rather than making copies

#Making copies is expensive in terms of both speed and memory
b=a
b[0]=1
a

array([1.        , 0.16638821, 1.87049202])

Making Copies

In [49]:
a = np.random.randn(3)
a

array([0.69895719, 1.13252547, 2.30082569])

In [50]:
b=np.copy(a)
b

array([0.69895719, 1.13252547, 2.30082569])

In [51]:
b[0]=1
b

array([1.        , 1.13252547, 2.30082569])

In [52]:
a

array([0.69895719, 1.13252547, 2.30082569])

Additional Functionality

NumPy provides versions of the standard functions log, exp, sin, etc. that act element-wise on arrays

In [53]:
z=np.array([1,2,3])
np.sin(z)

array([0.84147098, 0.90929743, 0.14112001])

In [54]:
z

array([1, 2, 3])

In [56]:
(1/np.sqrt(2*np.pi))*np.exp(-0.5*z*2) #the usual arithmetic operations (+, *, etc.) also work element-wise

array([0.14676266, 0.05399097, 0.01986217])

In [57]:
#The NumPy function np.where provides a vectorized alternative:
x=np.random.randn(4)
x

array([ 0.01764806, -2.54839338, -0.83726855,  0.68516311])

In [58]:
np.where(x>0,1,0) ## Insert 1 if x > 0 true, otherwise 0

array([1, 0, 0, 1])

In [59]:
#Not all user defined functions will act element-wise
def f(x):
    return 1 if x>0 else 0
#passing the function f defined below a NumPy array causes a ValueError

f(x)


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [60]:
#You can also use np.vectorize to vectorize a given function

def f(x):
    return 1 if x>0 else 0
f=np.vectorize(f)
f(x)

array([1, 0, 0, 1])

Comparisons

In [61]:
#As a rule, comparisons on arrays are done element-wise
z = np.array([2,3])
y = np.array([2,3])
z == y

array([ True,  True])

In [62]:
y[0]=5
z == y

array([False,  True])

In [63]:
z!=y

array([ True, False])

In [64]:
#We can also do comparisons against scalars
z = np.linspace(1,2,5)
z

array([1.  , 1.25, 1.5 , 1.75, 2.  ])

In [65]:
z > 1

array([False,  True,  True,  True,  True])

In [66]:
z < 2

array([ True,  True,  True,  True, False])

In [67]:
#This is particularly useful for conditional extraction
b = z < 2
b

array([ True,  True,  True,  True, False])

In [68]:
z[b]

array([1.  , 1.25, 1.5 , 1.75])

In [69]:
z[z < 2]

array([1.  , 1.25, 1.5 , 1.75])

Subpackages

In [71]:
# np.random
#>>> s = np.random.binomial(n, p, 1000)
# result of flipping a coin 10 times, tested 1000 times.
y = np.random.binomial(10,0.5,size = 10)
y

array([5, 3, 3, 5, 5, 4, 2, 4, 4, 5])

In [72]:
y.mean()

4.0

In [73]:
#subpackage np.linalg
A = np.array([[[1,2],[3,4]]])

np.linalg.det(A) ## Compute the determinant(行列式)

array([-2.])

In [74]:
np.linalg.inv(A) ## Compute the inverse


array([[[-2. ,  1. ],
        [ 1.5, -0.5]]])

Exercise