In [1]:
import numpy as np
import sys

The Basics

In [2]:
a = np.array([1,2,3], dtype='int32')
print(a)

[1 2 3]


In [3]:
b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [4]:
# Get Dimension
a.ndim

1

In [5]:
np.array([1,2,3,4], dtype="str")

array(['1', '2', '3', '4'], dtype='<U1')

In [6]:
np.array([1,2,3,4,5], dtype= 'float32')

array([1., 2., 3., 4., 5.], dtype=float32)

In [7]:
# nested lists result in multidimensional arrays

np.array([range(i,i+4) for i in [2,4,6,10]])

array([[ 2,  3,  4,  5],
       [ 4,  5,  6,  7],
       [ 6,  7,  8,  9],
       [10, 11, 12, 13]])

In [8]:
# Define the ranges for i and z
i_values = [2, 4, 6]
z_values = [8, 10, 12]

# Generate a 3D array using nested list comprehensions
# For each 'i', we create a 2D array (slice)
# For each 'z' within 'i', we create a row
# The innermost 'range' creates the elements of the row
three_d_array = np.array([
    [list(range(i + z, i + z + 3)) for z in z_values]
    for i in i_values
])

print(three_d_array)
print("\nShape of the 3D array:", three_d_array.shape)

[[[10 11 12]
  [12 13 14]
  [14 15 16]]

 [[12 13 14]
  [14 15 16]
  [16 17 18]]

 [[14 15 16]
  [16 17 18]
  [18 19 20]]]

Shape of the 3D array: (3, 3, 3)


In [9]:
# Create a length-10 integer array filled with zeros

np.zeros(10, dtype="int") 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
np.zeros((6,6), dtype='int')

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [11]:
# Create a 3x5 floating-point array filled with 1s
np.ones((3,5), dtype='float')

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
# Create a 3x5 array filled with 3.14
np.full((3,5),3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [13]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [14]:
# Create an array of five values evenly spaced between 0 and 1

print(np.linspace(0, 22/7, 10))
print(np.linspace(0, 1, 5))
print(np.linspace(0, 1, 5, dtype = 'int'))

[0.         0.34920635 0.6984127  1.04761905 1.3968254  1.74603175
 2.0952381  2.44444444 2.79365079 3.14285714]
[0.   0.25 0.5  0.75 1.  ]
[0 0 0 0 1]


In [15]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
print(np.random.random((3,3)))
print(np.random.randint(0, 10, size= (3,3)))

[[0.18413444 0.49721315 0.06981932]
 [0.22465577 0.57846681 0.03386414]
 [0.55009654 0.30890574 0.65811335]]
[[1 7 6]
 [5 9 4]
 [2 8 7]]


In [16]:
np.random.randint(50, 99, (4,4))

array([[58, 63, 58, 50],
       [51, 63, 67, 81],
       [55, 58, 58, 97],
       [92, 55, 94, 54]])

In [17]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1

np.random.normal(0,1,(3,3))

array([[-0.89016652,  0.34842059, -0.18844862],
       [ 0.58602417,  0.365884  , -0.33764495],
       [ 1.10621817,  0.07076411, -1.33242767]])

In [18]:
# Create a 3x3 array of random integers in the interval [0, 10)

np.random.randint(0,10,(3,3))

array([[9, 1, 9],
       [6, 8, 2],
       [4, 8, 1]])

In [19]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [20]:
# Create a 3x3 identity matrix

np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

""""NumPy Standard Data Types"""

In [21]:
#Return a new array of given shape and type, with random values

np.empty((3,3),dtype="int")

array([[4607182418800017408,                   0,                   0],
       [                  0, 4607182418800017408,                   0],
       [                  0,                   0, 4607182418800017408]])

In [22]:
np.zeros((10),dtype="int16")

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [23]:
#or using the associated NumPy object:

np.zeros(10,dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

![4212_t2-1.png](attachment:a7cf0fb1-ef62-4d57-90ef-5612b17c91a0.png)

The Basics of NumPy Arrays¶

We’ll cover a few categories of basic array manipulations here:
Attributes of arrays
Determining the size, shape, memory consumption, and data types of arrays
Indexing of arrays
Getting and setting the value of individual array elements
Slicing of arrays
Getting and setting smaller subarrays within a larger array
Reshaping of arrays
Changing the shape of a given array
Joining and splitting of arrays
Combining multiple arrays into one, and splitting one array into many


NumPy Array Attributes

In [24]:
#NumPy Array Attributes
#We’ll use NumPy’s random number generator, which we will seed with a set value in order to ensure that the same random arrays are generated each time this code is run:

np.random.seed(0) # seed for reproducibility
x1 = np.random.randint(10, size=6) # One-dimensional array
print(x1)

[5 0 3 3 7 9]


In [25]:
#Each array has attributes ndim (the number of dimensions), shape (the size of each dimension), and size (the total size of the array):

np.random.seed(0) # seed for reproducibility
x1 = np.random.randint(10, size=6) #it's same ((np.random.randint((0,10), size=6))) # One-dimensional array
x2 = np.random.randint(10, size=(3,4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3,4,5)) # Three-dimensional array

print("x1 ndim: ",x1.ndim)
print("x1 shape: ",x1.shape)
print("x1 size: ",x1.size) #totaly,6 elements

print("x1 ndim: ",x2.ndim)
print("x1 shape: ",x2.shape)
print("x1 size: ",x2.size) #totaly,12 elements

print("x1 ndim: ",x3.ndim)
print("x1 shape: ",x3.shape)
print("x1 size: ",x3.size)#totaly,60 elements

print("dtype: ",x1.dtype) #the data type of the array
# Other attributes include itemsize, which lists the size (in bytes) of each array element,
# and nbytes, which lists the total size (in bytes) of the array:
print("itemsize:",x1.itemsize,"bytes")
print("nbytes:",x1.nbytes,"bytes")

print("dtype: ",x2.dtype) #the data type of the array
print("itemsize:",x2.itemsize,"bytes")
print("nbytes:",x2.nbytes,"bytes")

print("dtype: ",x3.dtype) #the data type of the array
print("itemsize:",x3.itemsize,"bytes")
print("nbytes:",x3.nbytes,"bytes") 

#In general, we expect that nbytes is equal to itemsize times size.

x1 ndim:  1
x1 shape:  (6,)
x1 size:  6
x1 ndim:  2
x1 shape:  (3, 4)
x1 size:  12
x1 ndim:  3
x1 shape:  (3, 4, 5)
x1 size:  60
dtype:  int64
itemsize: 8 bytes
nbytes: 48 bytes
dtype:  int64
itemsize: 8 bytes
nbytes: 96 bytes
dtype:  int64
itemsize: 8 bytes
nbytes: 480 bytes


In [26]:
x1

array([5, 0, 3, 3, 7, 9])

In [27]:
x1[0]

np.int64(5)

In [28]:
x1[4]

np.int64(7)

In [29]:
x1[-1]

np.int64(9)

In [30]:
x1[-2]

np.int64(7)

In [31]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [32]:
x2[1,2]

np.int64(8)

In [33]:
x2[-1,-1]

np.int64(7)

In [34]:
x2[-1,-2]

np.int64(7)

In [35]:
#You can also modify values using any of the above index notation:

x2[0,0]=12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [36]:
x1

array([5, 0, 3, 3, 7, 9])

In [37]:
x1[0] = 3.14159 # this will be truncated! RANDINT used
x1

array([3, 0, 3, 3, 7, 9])

Array Slicing: Accessing Subarrays¶

Just as we can use square brackets to access individual array elements, we can also use them to access subarrays with the slice notation, marked by the colon (:) character. The NumPy slicing syntax follows that of the standard Python list; to access a slice of an array x, use this:
x[start:stop:step]
If any of these are unspecified, they default to the values start=0, stop=size of dimension, step=1. We’ll take a look at accessing subarrays in one dimension and in multiple dimensions.

In [38]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
x[:5] # first five elements

array([0, 1, 2, 3, 4])

In [40]:
x[5:] # elements after index 5

array([5, 6, 7, 8, 9])

In [41]:
x[4:7]# middle subarray

array([4, 5, 6])

In [42]:
x[::2] # every other element

array([0, 2, 4, 6, 8])

In [43]:
x[1::2] #every other element, starting at index 1

array([1, 3, 5, 7, 9])

In [44]:
x[-7:-2:2]

array([3, 5, 7])

In [45]:
x[-4:-2:1]

array([6, 7])

In [46]:
# A potentially confusing case is when the step value is negative. In this case, the
# defaults for start and stop are swapped. This becomes a convenient way to reverse
# an array:

x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [47]:
x[5::-2]# reversed every other from index 5

array([5, 3, 1])

In [48]:
x[5:1:-2]

array([5, 3])

In [49]:
x[5:-8:-1]

array([5, 4, 3])

In [50]:
x[7:-6:-1]

array([7, 6, 5])

In [51]:
x[-7:-8:-1]

array([3])

Multidimensional subarrays

In [52]:
# Multidimensional slices work in the same way, with multiple slices separated by commas.
# For example:

x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [53]:
# two rows, three columns

x2[:2, :3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [54]:
# all rows, every other column

x2[:3,::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [55]:
#Finally, subarray dimensions can even be reversed together:

x2[::-1,::-1]


array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

Accessing array rows and columns

In [56]:
# One commonly needed routine is accessing single
# rows or columns of an array. You can do this by combining indexing and slicing,
# using an empty slice marked by a single colon (:):

print(x2[:, 0]) # first column of x2

[12  7  1]


In [57]:
print(x2[0,:]) # first row of x2

[12  5  2  4]


In [58]:
#In the case of row access, the empty slice can be omitted for a more compact syntax:

print(x2[0]) # equivalent to x2[0, :]

[12  5  2  4]



Subarrays as no-copy views
One important—and extremely useful—thing to know about array slices is that they return views rather than copies of the array data. This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies. Consider our two-dimensional array from before:

In [59]:
print(x2)

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [60]:
#Let’s extract a 2×2 subarray from this:

x2_sub = x2[:2,:2]
print(x2_sub)

[[12  5]
 [ 7  6]]


In [61]:
#Now if we modify this subarray, we’ll see that the original array is changed! Observe:

x2_sub[0,0] = 99
print(x2_sub)

[[99  5]
 [ 7  6]]


In [62]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


This default behavior is actually quite useful: it means that when we work with large datasets, we can access and process pieces of these datasets without the need to copy the underlying data buffer.


Creating copies of arrays
Despite the nice features of array views, it is sometimes useful to instead explicitly copy the data within an array or a subarray. This can be most easily done with the copy() method:

In [63]:
x2_sub_copy = x2[:2,:2].copy()
print(x2_sub_copy)


[[99  5]
 [ 7  6]]


In [64]:
#If we now modify this subarray, the original array is not touched:

x2_sub_copy[0,0] = 42
print(x2_sub_copy)

[[42  5]
 [ 7  6]]


In [65]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


Reshaping of Arrays

In [66]:
# Another useful type of operation is reshaping of arrays. The most flexible way of
# doing this is with the reshape() method. For example, if you want to put the numbers
# 1 through 9 in a 3×3 grid, you can do the following:

grid = np.arange(1,10,1).reshape(3,3)
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


Where possible, the reshape method will use a no-copy view of the initial array, but with noncontiguous memory buffers this is not always the case.

Another common reshaping pattern is the conversion of a one-dimensional array into a two-dimensional row or column matrix. You can do this with the reshape method, or more easily by making use of the newaxis keyword within a slice operation:

In [67]:
x = np.array([1, 2, 3])
x.shape # x is a vector (3,)

(3,)

In [68]:
# row vector via reshape

x.reshape(1,3).shape

(1, 3)

In [69]:
# row vector via newaxis

x[np.newaxis, :].shape

(1, 3)

In [70]:
x.reshape(1,-1).shape

(1, 3)

In [71]:
# column vector via reshape

x.reshape(3, 1)

array([[1],
       [2],
       [3]])

In [72]:
# column vector via newaxis

x[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [73]:
x.reshape(-1,1).shape

(3, 1)

Array Concatenation and Splitting

In [74]:
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate((x, y))

array([1, 2, 3, 3, 2, 1])

In [75]:
z = np.array([99,99,99]) #z =[99,99,99]

np.concatenate((x,y,z))

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [76]:
grid = np.array([[1,2,3],
                [4,5,6]])

In [77]:
np.concatenate((grid,grid))

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [78]:
# concatenate along the second axis (zero-indexed)

np.concatenate((grid, grid), axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [79]:
# For working with arrays of mixed dimensions, it can be clearer to use the np.vstack
# (vertical stack) and np.hstack (horizontal stack) functions:

x = np.array([1,2,3])
grid = np.array([[9,8,7],
                 [6,5,4]])


# vertically stack the arrays
np.vstack([x,grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [80]:
#horizontally stack the arrays

y = np.array([[99],
            [99]])
np.hstack([grid,y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

Similarly, np.dstack will stack arrays along the third axis.

Splitting of arrays

In [81]:
x = [1,2,3,99,99,3,2,1]
x1, x2, x3 = np.split(x, [3,5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [82]:
x = np.array([1,2,3,99,99,3,2,1])
x1, x2, x3, x4 = np.split(x, [3,4,5])
print(x1, x2, x3,x4)


[1 2 3] [99] [99] [3 2 1]


Notice that N split points lead to N + 1 subarrays. The related functions np.hsplit and np.vsplit are similar:

In [83]:
grid = np.arange(36,dtype=float).reshape((6,6))
grid

array([[ 0.,  1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9., 10., 11.],
       [12., 13., 14., 15., 16., 17.],
       [18., 19., 20., 21., 22., 23.],
       [24., 25., 26., 27., 28., 29.],
       [30., 31., 32., 33., 34., 35.]])

In [84]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10. 11.]]
[[12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. 22. 23.]
 [24. 25. 26. 27. 28. 29.]
 [30. 31. 32. 33. 34. 35.]]


In [85]:
upper,middle, lower = np.vsplit(grid, [2,3])
print("upper: ",upper)
print("middle: ",middle)
print("lower: ",lower)

upper:  [[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10. 11.]]
middle:  [[12. 13. 14. 15. 16. 17.]]
lower:  [[18. 19. 20. 21. 22. 23.]
 [24. 25. 26. 27. 28. 29.]
 [30. 31. 32. 33. 34. 35.]]


In [86]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0.  1.]
 [ 6.  7.]
 [12. 13.]
 [18. 19.]
 [24. 25.]
 [30. 31.]]
[[ 2.  3.  4.  5.]
 [ 8.  9. 10. 11.]
 [14. 15. 16. 17.]
 [20. 21. 22. 23.]
 [26. 27. 28. 29.]
 [32. 33. 34. 35.]]


In [87]:
left, right = np.hsplit(grid, 2)
print(left)
print(right)

[[ 0.  1.  2.]
 [ 6.  7.  8.]
 [12. 13. 14.]
 [18. 19. 20.]
 [24. 25. 26.]
 [30. 31. 32.]]
[[ 3.  4.  5.]
 [ 9. 10. 11.]
 [15. 16. 17.]
 [21. 22. 23.]
 [27. 28. 29.]
 [33. 34. 35.]]


Computation on NumPy Arrays: Universal Functions

Exploring NumPy’s UFuncs¶
Ufuncs exist in two flavors: unary ufuncs, which operate on a single input, and binary ufuncs, which operate on two inputs. We’ll see examples of both these types of functions here.

In [88]:
# NumPy’s ufuncs feel very natural to use because they make use of Python’s native
# arithmetic operators. The standard addition, subtraction, multiplication, and division
# can all be used:

x = np.arange(4)
print("x =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2) # floor division

x = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


In [89]:
#There is also a unary ufunc for negation, a ** operator for exponentiation, and a %
#operator for modulus:

print("-x = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2 = ", x % 2)

-x =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2 =  [0 1 0 1]


In [90]:
# In addition, these can be strung together however you wish, and the standard order
# of operations is respected:

-(0.5*x+1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [91]:
# All of these arithmetic operations are simply convenient wrappers around specific
# functions built into NumPy; for example, the + operator is a wrapper for the add
# function:

print(np.add(3,2))

print(np.add(x,2)) #Addition +
print(np.subtract(x,5)) #Subtraction -
print(np.negative(x)) #Unary negation -
print(np.multiply(x,3)) #Multiplication *
print(np.divide(x,2)) #Division /
print(np.floor_divide(x,2)) #Floor division //
print(np.power(x,2)) #Exponentiation **
print(np.mod(x,2)) #Modulus/remainder **

print(np.multiply(x, x))


5
[2 3 4 5]
[-5 -4 -3 -2]
[ 0 -1 -2 -3]
[0 3 6 9]
[0.  0.5 1.  1.5]
[0 0 1 1]
[0 1 4 9]
[0 1 0 1]
[0 1 4 9]


In [92]:
# Just as NumPy understands Python’s built-in arithmetic operators, it also understands
# Python’s built-in absolute value function:

x = np.array([-2,-1,0,1,2])
abs(x)

array([2, 1, 0, 1, 2])

In [93]:
# The corresponding NumPy ufunc is np.absolute, which is also available under the
# alias np.abs:

print(np.absolute(x))
print(np.abs(x))

[2 1 0 1 2]
[2 1 0 1 2]


In [94]:
# This ufunc can also handle complex data, in which the absolute value returns the
# magnitude:

x = np.array([7-24j,4-3j,2+0j,1+3j])
np.abs(x)

array([25.        ,  5.        ,  2.        ,  3.16227766])

In [95]:
# NumPy provides a large number of useful ufuncs, and some of the most useful for the
# data scientist are the trigonometric functions. We’ll start by defining an array of
# angles:

theta = np.linspace(0,np.pi,3)


#Now we can compute some trigonometric fuctions on these values:
print("theta      =",theta)
print("sin(theta) =",np.sin(theta))
print("cos(theta) =",np.cos(theta))
print("tan(theta) =",np.tan(theta))

theta      = [0.         1.57079633 3.14159265]
sin(theta) = [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos(theta) = [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan(theta) = [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [96]:
x = [-1, 0, 1]

print("x = ", x)
print("arcsin(x) = ", np.arcsin(x))
print("arccos(x) = ", np.arccos(x))
print("arctan(x) = ", np.arctan(x))

x =  [-1, 0, 1]
arcsin(x) =  [-1.57079633  0.          1.57079633]
arccos(x) =  [3.14159265 1.57079633 0.        ]
arctan(x) =  [-0.78539816  0.          0.78539816]


In [97]:
x = [1,2,3]
print("x      =",x)
print("e^x    =",np.exp(x))
print("2^x    =",np.exp2(x))
print("3^x    =",np.power(3,x))

x      = [1, 2, 3]
e^x    = [ 2.71828183  7.3890561  20.08553692]
2^x    = [2. 4. 8.]
3^x    = [ 3  9 27]


In [98]:
# The inverse of the exponentials, the logarithms, are also available. The basic np.log
# gives the natural logarithm; if you prefer to compute the base-2 logarithm or the
# base-10 logarithm, these are available as well:

x = [1, 2, 4, 10]
print("x        =", x)
print("ln(x)    =", np.log(x))
print("log2(x)  =", np.log2(x))
print("log10(x) =", np.log10(x))

x        = [1, 2, 4, 10]
ln(x)    = [0.         0.69314718 1.38629436 2.30258509]
log2(x)  = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [99]:
# There are also some specialized versions that are useful for maintaining precision
# with very small input:

x = [0, 0.001, 0.01, 0.1]
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [0.         0.0010005  0.01005017 0.10517092]
log(1 + x) = [0.         0.0009995  0.00995033 0.09531018]


Advanced Ufunc Features


Specifying output
For large calculations, it is sometimes useful to be able to specify the array where the result of the calculation will be stored. Rather than creating a temporary array, you can use this to write computation results directly to the memory location where you’d like them to be. For all ufuncs, you can do this using the out argument of the function:

In [100]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [101]:
#This can even be used with array views. For example, we can write the results of a
#computation to every other element of a specified array:

y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

# If we had instead written y[::2] = 2 ** x, this would have resulted in the creation
# of a temporary array to hold the results of 2 ** x, followed by a second operation
# copying those values into the y array. This doesn’t make much of a difference for such
# a small computation, but for very large arrays the memory savings from careful use of
# the out argument can be significant.


[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


In [102]:
y = np.zeros(10)
y[::2] = 2 ** x
print(y)

[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]


Aggregates¶
For binary ufuncs, there are some interesting aggregates that can be computed directly from the object. For example, if we’d like to reduce an array with a particular operation, we can use the reduce method of any ufunc. A reduce repeatedly applies a given operation to the elements of an array until only a single result remains. For example, calling reduce on the add ufunc returns the sum of all elements in the array:

In [103]:
x = np.arange(1,6)
print(x)
print(np.add.reduce(x))
print(np.subtract.reduce(x))
print(np.multiply.reduce(x))

[1 2 3 4 5]
15
-13
120


In [104]:
#If we’d like to store all the intermediate results of the computation, we can instead use
#accumulate:
print(x)
print(np.add.accumulate(x))

print(np.subtract.accumulate(x))

print(np.multiply.accumulate(x))

print(np.divide.accumulate(x))

print(np.floor_divide.accumulate(x))

print(x)

[1 2 3 4 5]
[ 1  3  6 10 15]
[  1  -1  -4  -8 -13]
[  1   2   6  24 120]
[1.         0.5        0.16666667 0.04166667 0.00833333]
[1 0 0 0 0]
[1 2 3 4 5]


Outer products¶

Finally, any ufunc can compute the output of all pairs of two different inputs using the outer method. This allows you, in one line, to do things like create a multiplica‐ tion table:

In [105]:
x = np.arange(1,6)
np.multiply.outer(x, x)

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

In [106]:
# As a quick example, consider computing the sum of all values in an array. Python
# itself can do this using the built-in sum function:

L = np.random.random(100)
sum(L)

np.float64(52.12818058833704)

In [107]:
#The syntax is quite similar to that of NumPy’s sum function, and the result is the same
#in the simplest case:

np.sum(L)

np.float64(52.12818058833702)

In [108]:
# However, because it executes the operation in compiled code, NumPy’s version of the
# operation is computed much more quickly:

big_array = np.random.rand(1000000)
print(big_array)
%timeit sum(big_array)
%timeit np.sum(big_array)

[0.16684751 0.77905102 0.8649333  ... 0.58889529 0.6247286  0.55565566]
39.1 ms ± 245 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
188 μs ± 2.23 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [109]:
#Similarly, Python has built-in min and max functions, used to find the minimum value
#and maximum value of any given array:

min(big_array),max(big_array)

(np.float64(1.4057692298008462e-06), np.float64(0.9999994392723005))

In [110]:
%timeit min(big_array)
%timeit np.min(big_array)

30.1 ms ± 113 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
112 μs ± 2 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [111]:
# For min, max, sum, and several other NumPy aggregates, a shorter syntax is to use
# methods of the array object itself:

print(big_array.min(), big_array.max(), big_array.sum())
# Whenever possible, make sure that you are using the NumPy version of these aggre‐
#gates when operating on NumPy arrays!
%timeit np.min(big_array)
%timeit big_array.min()

1.4057692298008462e-06 0.9999994392723005 500202.5348847683
109 μs ± 1.94 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
108 μs ± 1.04 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


Multidimensional aggregates¶

In [112]:
# One common type of aggregation operation is an aggregate along a row or column.
# Say you have some data stored in a two-dimensional array:
np.random.seed(seed=0)
M = np.random.random((3,4))
print(M)

M.sum()

[[0.5488135  0.71518937 0.60276338 0.54488318]
 [0.4236548  0.64589411 0.43758721 0.891773  ]
 [0.96366276 0.38344152 0.79172504 0.52889492]]


np.float64(7.478282790980994)

In [113]:
# the aggregate is computed. For example, we can find the minimum value within each
# column by specifying axis=0:

print(M.min(axis=0))
#or use that way
print(np.min(M,axis=0))
M

[0.4236548  0.38344152 0.43758721 0.52889492]
[0.4236548  0.38344152 0.43758721 0.52889492]


array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492]])

In [114]:
# Similarly, we can find the maximum value within each row:

M.max(axis=1)

array([0.71518937, 0.891773  , 0.96366276])

In [115]:
# Note that some of these NaN-safe functions were not added until
# NumPy 1.8, so they will not be available in older NumPy versions.

x = np.array([1,2,np.nan,4,5])

print("np.sum       =",np.sum(x))
print("np.nansum    =",np.nansum(x))

print("np.mean      =",np.mean(x))
print("np.nanmean   =",np.nanmean(x))

print("np.std       =",np.std(x))
print("np.nanstd    =",np.nanstd(x))


#Be careful that this is not a real index of minimum value.
print("np.argmin    =",np.argmin(x)) 
#if there is a nan value in an array, it returns index of nan value.


print("np.nanargmin =",np.nanargmin(x))

np.sum       = nan
np.nansum    = 12.0
np.mean      = nan
np.nanmean   = 3.0
np.std       = nan
np.nanstd    = 1.5811388300841898
np.argmin    = 2
np.nanargmin = 0


![4214_t2-3.png](attachment:79000067-d305-4da0-b051-ac2895d18d3a.png)

Computation on Arrays: Broadcasting¶
Broadcasting is simply a set of rules for applying binary ufuncs (addition, subtraction, multiplication, etc.) on arrays of different sizes.


In [116]:
import numpy as np

a = np.array([0,1,2])
b = np.array([5,5,5])
a+b

array([5, 6, 7])

In [117]:
a+5

array([5, 6, 7])

In [118]:
# We can similarly extend this to arrays of higher dimension. Observe the result when
# we add a one-dimensional array to a two-dimensional array:

M = np.ones((3,3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

![02.05-broadcasting.png](attachment:b4fd3eed-c506-4105-a3ed-073c27e078aa.png)

Rules of Broadcasting¶
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.

Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

In [119]:
#Let’s look at adding a two-dimensional array to a one-dimensional array:
M = np.ones((2,3))
print(M)
a = np.arange(3)
print(a)
# Let’s consider an operation on these two arrays. The shapes of the arrays are:
# M.shape = (2, 3)
# a.shape = (3,)
# We see by rule 1 that the array a has fewer dimensions, so we pad it on the left with
# ones:
# M.shape -> (2, 3)
# a.shape -> (1, 3)
# By rule 2, we now see that the first dimension disagrees, so we stretch this dimension
# to match:
# M.shape -> (2, 3)
# a.shape -> (2, 3)
# The shapes match, and we see that the final shape will be (2, 3) :

M+a

[[1. 1. 1.]
 [1. 1. 1.]]
[0 1 2]


array([[1., 2., 3.],
       [1., 2., 3.]])

In [120]:
# Let’s take a look at an example where both arrays need to be broadcast:
a = np.arange(3).reshape((3,1))
print(a)
b = np.arange(3)
print(b)
# Again, we’ll start by writing out the shape of the arrays:

# a.shape = (3, 1)
# b.shape = (3,)
# |
# Rule 1 says we must pad the shape of b with ones:
# a.shape -> (3, 1)
# b.shape -> (1, 3)
# And rule 2 tells us that we upgrade each of these ones to match the corresponding
# size of the other array:
# a.shape -> (3, 3)
# b.shape -> (3, 3)
# Because the result matches, these shapes are compatible. We can see this here:
a+b


[[0]
 [1]
 [2]]
[0 1 2]


array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [121]:
# Now let’s take a look at an example in which the two arrays are not compatible:

M = np.ones((3,2))
print(M)
a = np.arange(3)
print(a)

# This is just a slightly different situation than in the first example: the matrix M is
# transposed. How does this affect the calculation? The shapes of the arrays are:
# M.shape = (3, 2)
# a.shape = (3,)
# Again, rule 1 tells us that we must pad the shape of a with ones:
# M.shape -> (3, 2)
# a.shape -> (1, 3)
# By rule 2, the first dimension of a is stretched to match that of M :
# M.shape -> (3, 2)
# a.shape -> (3, 3)
# Now we hit rule 3—the final shapes do not match, so these two arrays are incompati‐
# ble, as we can observe by attempting this operation:

# print(M+a) #ERROR! operands could not be broadcast together with shapes

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[0 1 2]


In [122]:
print(a[:, np.newaxis].shape)
M + a[:, np.newaxis]

(3, 1)


array([[1., 1.],
       [2., 2.],
       [3., 3.]])

Comparisons, Masks, and Boolean Logic¶


Comparison Operators as ufuncs
The result of these comparison operators is always an array with a Boolean data type. All six of the standard comparison operations are available:

for example, you might wish to count all values greater than a certain value, or perhaps remove all outliers that are above some threshold. In NumPy, Boolean masking is often the most efficient way to accomplish these types of tasks.

In [123]:
x = np.array([1,2,3,4,5])

print(x<3)  # less than
print(x>3)  # greater than
print(x<=3) #less than or equal
print(x>=3) #greater than or equal
print(x!=3) #not equal
print(x==3) #equal

[ True  True False False False]
[False False False  True  True]
[ True  True  True False False]
[False False  True  True  True]
[ True  True False  True  True]
[False False  True False False]


In [124]:
# It is also possible to do an element-by-element comparison of two arrays, and to
# include compound expressions:

(2*x) == (2**x)

array([ True,  True, False, False, False])

 As in the case of arithmetic operators, the comparison operators are implemented as
 ufuncs in NumPy; for example, when you write x < 3 , internally NumPy uses
 np.less(x, 3) . A summary of the comparison operators and their equivalent ufunc
 is shown here:

![4229_2.png](attachment:6ae1e6b9-e358-441b-b327-0db4b4b45e52.png)

In [125]:
rng = np.random.RandomState(seed=0)
x = rng.randint(10, size=(3,4))
print(x)

x<6

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]


array([[ True,  True,  True,  True],
       [False, False,  True,  True],
       [ True,  True, False, False]])

In [126]:
# Working with Boolean Arrays

In [127]:
print(x)

# To count the number of True entries in a Boolean array, np.count_nonzero is useful:

# how many values less than 6?
print("1-: ",np.count_nonzero(x<6))

# We see that there are eight array entries that are less than 6. Another way to get at this
# information is to use np.sum ; in this case, False is interpreted as 0 , and True is inter‐
# preted as 1 :

print("2-: ",np.sum(x<6))

print("3-: ",np.sum(x!=np.nan))
print("4-: ",np.count_nonzero(x!=np.nan))

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]
1-:  8
2-:  8
3-:  12
4-:  12


In [128]:
# how many values less than 6 in each row?
print(np.sum(x < 6, axis=1))

# how many values less than 6 in each column?
print(np.sum(x < 6, axis=0))

[4 2 2]
[2 2 2 2]


In [129]:
# If we’re interested in quickly checking whether any or all the values are true, we can
# use (you guessed it) np.any() or np.all() :

# are there any values greater than 8?
print(np.any(x>8))

# are there any values less than zero?
print(np.any(x<0))

# are all values less than 10?
print(np.all(x<10))

# are all values equal to 6?
print(np.all(x==6))

True
False
True
False


In [130]:
# are all values in each row less than 8?
print(np.all(x<8, axis=1))

# are all values in each column less than 3?
print(np.all(x<3, axis=0))

[ True False  True]
[False False False False]


In [131]:
print(x)
print(x<5)
print(x[x<5])

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]
[[False  True  True  True]
 [False False  True False]
 [ True  True False False]]
[0 3 3 3 2 4]


In [132]:
# In Python, all nonzero integers will evaluate as True .
bool(42), bool(0), bool(-1)

(True, False, True)

In [133]:
bool(42 and 0)

False

In [134]:
bool(42 or 0)

True

In [135]:
# When you have an array of Boolean values in NumPy, this can be thought of as a
# string of bits where 1 = True and 0 = False , and the result of & and | operates in a
# similar manner as before:

A = np.array([1, 0, 1, 0, 1, 0], dtype=bool)
B = np.array([1, 1, 1, 0, 1, 1], dtype=bool)
A | B

array([ True,  True,  True, False,  True,  True])

In [136]:
x = np.arange(10)
(x > 4) & (x < 8)

array([False, False, False, False, False,  True,  True,  True, False,
       False])

Fancy Indexing

In [137]:
import numpy as np

rand = np.random.RandomState(42)
x = rand.randint(100, size=10)
print(x)

[51 92 14 71 60 20 82 86 74 74]


In [138]:
[x[3],x[7],x[2]]

[np.int64(71), np.int64(86), np.int64(14)]

In [139]:
ind = [3,7,4]
x[ind]

array([71, 86, 60])

In [140]:
ind = np.array([[3, 7],
                [4, 5]])
x[ind]

array([[71, 86],
       [60, 20]])

In [141]:
X = np.arange(12).reshape((3,4))
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [142]:
row = np.array([0,1,2])
col = np.array([2,1,3])
X[row,col]

array([ 2,  5, 11])

In [143]:
X[row[:, np.newaxis], col] #row[:, np.newaxis].shape (3,1)

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [144]:
# Here, each row value is matched with each column vector, exactly as we saw in broad‐
# casting of arithmetic operations. For example:

row[:, np.newaxis] * col

array([[0, 0, 0],
       [2, 1, 3],
       [4, 2, 6]])


Combined Indexing

In [145]:
print(X)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [146]:
X[2,[2,0,1]]

array([10,  8,  9])

In [147]:
X[1:, [2, 0, 1]]

array([[ 6,  4,  5],
       [10,  8,  9]])

In [148]:
mask = np.array([1, 0, 1, 0], dtype=bool)
X[row[:, np.newaxis], mask]  # mask 0 and 2 indixes are true!

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

Modifying Values with Fancy Indexing

In [149]:
x = np.arange(10)
i = np.array([2,1,8,4])
x[i] = 99
print(x)

[ 0 99 99  3 99  5  6  7 99  9]


In [150]:
x[i] -= 10
print(x)

[ 0 89 89  3 89  5  6  7 89  9]


In [151]:
x = np.zeros(10)
x[[0, 2]] = [4, 6]
print(x)

[4. 0. 6. 0. 0. 0. 0. 0. 0. 0.]


In [152]:
x = np.zeros(10)
x[[0, 0]] = [4, 6]
print(x)

# Where did the 4 go? The result of this operation is to first assign x[0] = 4 , followed
# by x[0] = 6 . The result, of course, is that x[0] contains the value 6.

[6. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [153]:
i = [2, 3, 3, 4, 4, 4]
x[i] += 1
x

array([6., 0., 1., 1., 1., 0., 0., 0., 0., 0.])

In [154]:
x = np.zeros(10)
np.add.at(x, i, 1)
print(x)

[0. 0. 1. 2. 3. 0. 0. 0. 0. 0.]


Sorting Arrays¶

Fast Sorting in NumPy: np.sort and np.argsort

In [155]:
x = np.array([2,1,4,3,5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [156]:
x.sort()
print(x)

[1 2 3 4 5]


In [157]:
#return indices
x = np.array([2,1,4,3,5])
i = np.argsort(x)
print(i)

x[i]

[1 0 3 2 4]


array([1, 2, 3, 4, 5])

Sorting along rows or columns

In [159]:
# A useful feature of NumPy’s sorting algorithms is the ability to sort along specific
# rows or columns of a multidimensional array using the axis argument. For example:

rand = np.random.RandomState(42)
X = rand.randint(0,10,(4,6))
print(X)

[[6 3 7 4 6 9]
 [2 6 7 4 3 7]
 [7 2 5 4 1 7]
 [5 1 4 0 9 5]]


In [160]:
# sort each column of X

np.sort(X, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [161]:
# sort each row of X

np.sort(X, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

Partial Sorts: Partitioning

In [163]:
# Note that the first three values in the resulting array are the three smallest in the
# array, and the remaining array positions contain the remaining values. Within the
# two partitions, the elements have arbitrary order.

x = np.array([7, 2, 1, 3, 6, 5, 4])
np.partition(x, 3)

array([2, 3, 1, 4, 6, 5, 7])

In [164]:
# The result is an array where the first two slots in each row contain the smallest values
# from that row, with the remaining values filling the remaining slots.

np.partition(X, 2, axis=1)

array([[3, 4, 6, 7, 6, 9],
       [2, 3, 4, 7, 6, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 9, 5]])

In [165]:
np.partition(X, 2, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [166]:
print(X)

[[6 3 7 4 6 9]
 [2 6 7 4 3 7]
 [7 2 5 4 1 7]
 [5 1 4 0 9 5]]


In [168]:
np.argpartition(X, 2, axis=1)

array([[1, 3, 0, 2, 4, 5],
       [0, 4, 3, 2, 1, 5],
       [4, 1, 3, 2, 0, 5],
       [3, 1, 2, 0, 4, 5]])

In [169]:
np.argpartition(X, 2, axis=0)

array([[1, 3, 3, 3, 2, 3],
       [3, 2, 2, 1, 1, 1],
       [0, 0, 1, 2, 0, 2],
       [2, 1, 0, 0, 3, 0]])