In [1]:
import numpy as np

resource: https://github.com/rougier/numpy-100/tree/master
- Note that because the goal is to improve my skills in numpy for deep learning, only selected exercises which I think will be useful are done in this notebook

#### 4. How to find the memory size of any array (★☆☆) 

In [43]:
#### 4. How to find the memory size of any array (★☆☆)

Z = np.zeros((10,10))
# float and integers are 8 bytes
# string
print("%d bytes" % (Z.size * Z.itemsize))
# alternatively
z_bytes = Z.nbytes
print(f'megabytes (Mb): {(Z.size * Z.itemsize) / 1e6}')

800 bytes
megabytes (Mb): 0.0008


In [177]:
# each value in double is 8 bytes
assert np.double(2).itemsize == 8

# float32 is 4 bytes each
assert np.float32(2).itemsize == 4

# flaot64 is 8 bytes each
assert np.float64(2).itemsize == 8

# each character in string contains 4 bytes
assert np.str_('4').itemsize == 4
assert np.str_('42').itemsize == 8
assert np.str_('423').itemsize == 12

# empty list if 8 bytes
assert np.array([]).itemsize == 8

# Get the entire item size of a list
a1 = np.array(['a','b','c'])
assert a1.itemsize * a1.size == 3 * 4 == 12

# # if different datatype in list, the bytes are not simple addition
a2 = np.array(['s',np.float32(4)]) 
assert a2.itemsize * a2.size != 4 + 4 
assert a2.itemsize * a2.size == 256 # not sure what is the calculation
# instead of calculating the byte size using `.itemsize` and `.size`, can use .nbytes
assert a2.nbytes == 256


# Use this if want the size of the numerical elements plus the whole reference machinery
# Else use itemsize
# b.__sizeof__() is equivalent to sys.getsizeof(b)
import sys
assert sys.getsizeof(a2) == a2.__sizeof__()


# Size of an numpy array
a3 = np.random.randn(2,3) # dtype == np.float64
assert a3.nbytes == 2 * 3 * 8 == 48

In [329]:
dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))])

Summarizing: If you want to know the size of the numerical elements use array.nbytes and it will work independently of whether there is a view or not. If you, on the other hand, want the size of the numerical elements plus the whole reference machinery you want to use getsizeof(array.base) to get reliable estimates independent of your view status.
- https://numpy.org/doc/stable/reference/generated/numpy.dtype.itemsize.html
- gs:find memory of numpy array

#### 15. Create a 2d array with 0 on the border and 1 inside (★☆☆)

In [191]:
arr = np.full((3,4),1)
arr[0,:] = 0
arr[-1,:] = 0
arr[:,0] = 0
arr[:,0-1] = 0
arr

array([[0, 0, 0, 0],
       [0, 1, 1, 0],
       [0, 0, 0, 0]])

In [197]:
# alternatively
arr = np.full((3,4),0)
# fill up the inside with 0
arr[1:-1,1:-1] = 1
arr

array([[0, 0, 0, 0],
       [0, 1, 1, 0],
       [0, 0, 0, 0]])

#### 15.2 Create a 2d array with 1 on the border and 0 inside (★☆☆)

In [196]:
arr = np.full((3,4),1)
# fill up the inside with 0
arr[1:-1,1:-1] = 0
arr

array([[1, 1, 1, 1],
       [1, 0, 0, 1],
       [1, 1, 1, 1]])

#### 16. How to add a border (filled with 0's) around an existing array? (★☆☆)


In [203]:
arr = np.ones((3,4))
np.pad(arr, pad_width=((1,1),(1,1)), constant_values=0)

array([[0., 0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [208]:
np.pad(np.random.randn(5,3), pad_width=((1,1),(1,1)), mode='edge')

array([[ 1.09908085,  1.09908085,  0.21465397,  0.49015871,  0.49015871],
       [ 1.09908085,  1.09908085,  0.21465397,  0.49015871,  0.49015871],
       [-0.67275515, -0.67275515, -0.89263339,  1.14658691,  1.14658691],
       [ 1.38703574,  1.38703574, -0.86545056,  0.65138971,  0.65138971],
       [ 0.17295039,  0.17295039, -0.4398122 ,  0.9671162 ,  0.9671162 ],
       [ 0.18624052,  0.18624052, -1.12797759,  0.54915093,  0.54915093],
       [ 0.18624052,  0.18624052, -1.12797759,  0.54915093,  0.54915093]])

In [None]:
# fancy indexing

In [216]:
arr = np.ones((3,4))
arr[:,[0,-1]] = 0 
arr[[0,-1], :] = 0
arr

array([[0., 0., 0., 0.],
       [0., 1., 1., 0.],
       [0., 0., 0., 0.]])

In [242]:
Z = np.ones((5,5))
Z = np.pad(Z, pad_width=1, mode='constant', constant_values=0)
print(Z)

# Using fancy indexing
Z[:, [0, -1]] = 0
Z[[0, -1], :] = 0
print(Z)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 1. 1. 1. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


#### 18. Create a 5x5 matrix with values 1,2,3,4 just below the diagonal (★☆☆)

In [245]:
arr = np.zeros((5,5))
arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [247]:
arr[np.arange(4)+1, np.arange(4)] = 1+np.arange(4)
arr

array([[0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 2., 0., 0., 0.],
       [0., 0., 3., 0., 0.],
       [0., 0., 0., 4., 0.]])

In [248]:
Z = np.diag(1+np.arange(4),k=-1)
print(Z)

[[0 0 0 0 0]
 [1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]]


In [252]:
# numpy diagonal
np.diag(1+np.arange(4))

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [237]:
array = np.repeat(np.array([[1,2,3,4]]), repeats=(4), axis=0)
array

array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]])

In [241]:
np.tril(array,k=-1)

array([[0, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 2, 0, 0],
       [1, 2, 3, 0]])

In [258]:
arr = np.array([1,2,3])
np.tile(arr, (2,1))


array([[1, 2, 3],
       [1, 2, 3]])

#### 19. Create a 8x8 matrix and fill it with a checkerboard pattern (★☆☆)

In [275]:
arr = np.zeros((8,8))
arr[0::2,0::2] = 1
arr[1::2,1::2] = 1
arr

array([[1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.]])

In [280]:
arr = np.zeros((8,8))
arr[0::2,0::2] = 1
arr[1::2,1::2] = 1
arr

array([[1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.]])

In [286]:
arr = np.zeros((8,8))
arr[0::2,1::2] = 1
arr[0::2,0::2] = 0
arr[1::2,0::2] = 1
arr[1::2,0::2] = 1
arr

array([[0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0., 1., 0.]])

In [277]:
Z = np.zeros((8,8),dtype=int)
Z[1::2,::2] = 1
Z[::2,1::2] = 1
Z

array([[0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0]])

#### 20. Consider a (6,7,8) shape array, what is the index (x,y,z) of the 100th element? (★☆☆)
- https://stackoverflow.com/questions/48135736/what-is-an-intuitive-explanation-of-np-unravel-index
- gs:nupy np.unravel_index

In [305]:
arr = np.random.randn(6,7,8)
# Converts a flat index or array of flat indices into a tuple of coordinate arrays.
np.ravel(arr)[100] # NOTE IMPORTANT, THIS IS WRONG: not 100

-0.3430478711930245

In [309]:
np.ravel(arr)[99] # not 100

-0.2813078919500143

In [296]:
np.unravel_index([22, 41, 37], (7,6))

(array([3, 6, 6]), array([4, 5, 1]))

In [310]:
idx = np.unravel_index([99], shape=(6,7,8)) # note, should not be 100

arr[idx[0],idx[1],idx[2]]

array([-0.28130789])

In [307]:
arr[*idx]

array([-0.34304787])

In [308]:
print(np.unravel_index(99,(6,7,8)))

(1, 5, 3)


In [526]:
7 * 8

56

In [528]:
(99 - 56-1)

42

In [530]:
5 * 8 + 3 -1

42

#### 21. Create a checkerboard 8x8 matrix using the tile function (★☆☆)

In [313]:
arr = np.array([[0,1],[1,0]])
np.tile(arr, (4,4))

array([[0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 1, 0]])

#### 22. Normalize a 5x5 random matrix (★☆☆)

In [319]:
arr = np.random.randn(5,5)
arr = (arr - np.mean(arr)) / np.std(arr)
arr

array([[-1.1686121 , -2.36634059,  0.44648665,  0.34409408,  1.19138853],
       [-0.17983409, -1.41200154, -1.16555204, -1.08514651,  0.62257486],
       [-1.03996309,  0.46251986,  0.65779388,  1.4169711 , -0.34036311],
       [-0.11862121,  0.5321625 ,  0.54637486,  2.07564422, -0.74967328],
       [ 1.00526775,  0.92972134, -0.2331702 , -0.31151633, -0.06020556]])

In [320]:
# Z = np.random.random((5,5))
Z = arr
Z = (Z - np.mean (Z)) / (np.std (Z))
print(Z)

[[-1.1686121  -2.36634059  0.44648665  0.34409408  1.19138853]
 [-0.17983409 -1.41200154 -1.16555204 -1.08514651  0.62257486]
 [-1.03996309  0.46251986  0.65779388  1.4169711  -0.34036311]
 [-0.11862121  0.5321625   0.54637486  2.07564422 -0.74967328]
 [ 1.00526775  0.92972134 -0.2331702  -0.31151633 -0.06020556]]


#### 23. Create a custom dtype that describes a color as four unsigned bytes (RGBA) (★☆☆)

In [359]:
dt = np.dtype([('R', np.uint),('G', np.uint),('B', np.uint),('A', np.uint)])
dt

dtype([('R', '<u8'), ('G', '<u8'), ('B', '<u8'), ('A', '<u8')])

In [393]:
np.empty(10, dtype=dt)

array([(0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0)],
      dtype=[('R', '<u8'), ('G', '<u8'), ('B', '<u8'), ('A', '<u8')])

In [401]:
color = np.dtype([("r", np.ubyte),
                  ("g", np.ubyte),
                  ("b", np.ubyte),
                  ("a", np.ubyte)])

np.empty(100, dtype=color)

array([(0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0),
       (0, 0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0), (0, 0,

In [321]:
kerneldt = np.dtype([('myintname', np.int32), ('myfloats', np.float64, 9)])

In [338]:
kerneldt = np.dtype([('myintname', np.int32), ('myfloats', np.float64, 9)])

arr = np.empty(2, dtype=kerneldt)

In [344]:
arr

array([(0, [0., 0., 0., 0., 0., 0., 0., 0., 0.]),
       (0, [0., 0., 0., 0., 0., 0., 0., 0., 0.])],
      dtype=[('myintname', '<i4'), ('myfloats', '<f8', (9,))])

In [337]:
np.dtype(np.float32(5))

dtype('float32')

#### 25. Given a 1D array, negate all elements which are between 3 and 8, in place. (★☆☆)

In [411]:
arr = np.arange(10)
arr[(arr>=3) & (arr<=8)] = -arr[(arr>=3) & (arr<=8)]
arr

array([ 0,  1,  2, -3, -4, -5, -6, -7, -8,  9])

In [413]:
sum(range(5))

10

In [416]:
# this value is added to the sum of items of the iterable
sum(range(5),start=-1)

9

#### 28. What are the result of the following expressions? (★☆☆)


In [440]:
assert str(np.array(0) / np.array(0))  == 'nan' # will have warnings but code runs
assert (np.array(0) // np.array(0)) == 0 # will have warnings but code runs
assert str(np.array([np.nan])) == '[nan]'
assert np.array([np.nan]).astype(int) == 0 
assert np.array([np.nan]).astype(int).astype(float) == 0

In [435]:
np.array([np.nan])

array([nan])

In [436]:
str(np.array([np.nan]))

'[nan]'

#### 29. How to round away from zero a float array ? (★☆☆)

In [441]:
Z = np.random.uniform(-10,+10,10)
print(np.copysign(np.ceil(np.abs(Z)), Z))

[-10.   1.   2.   4.   7.  -2.   8.   1.  -5.   3.]


In [442]:
Z

array([-9.25130647,  0.25571781,  1.35574151,  3.14430913,  6.1756912 ,
       -1.8820982 ,  7.09338272,  0.08133671, -4.95737553,  2.30389419])

In [444]:
np.ceil(np.abs(Z))

array([10.,  1.,  2.,  4.,  7.,  2.,  8.,  1.,  5.,  3.])

In [445]:
np.copysign(np.ceil(np.abs(Z)), Z)

array([-10.,   1.,   2.,   4.,   7.,  -2.,   8.,   1.,  -5.,   3.])

#### 30. How to find common values between two arrays? (★☆☆)

In [447]:
a1 = np.random.randint(0,10,10)
a2 = np.random.randint(0,10,10)
np.intersect1d(a1, a2)

array([1, 8, 9])

In [448]:
np.sqrt(-1) == np.emath.sqrt(-1)

  np.sqrt(-1) == np.emath.sqrt(-1)


False

In [449]:
np.sqrt(-1)

  np.sqrt(-1)


nan

In [450]:
np.emath.sqrt(-1)

1j

#### 33. How to get the dates of yesterday, today and tomorrow? (★☆☆)

In [454]:
yesterday = np.datetime64('today') - np.timedelta64(1)
today = np.datetime64('today')
tomorrow  = np.datetime64('today') + np.timedelta64(1)

yesterday, today, tomorrow

(numpy.datetime64('2023-12-01'),
 numpy.datetime64('2023-12-02'),
 numpy.datetime64('2023-12-03'))

In [455]:
#### 34. How to get all the dates corresponding to the month of July 2016? (★★☆)

In [457]:
np.arange('2016-07', '2016-08')

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [458]:
np.arange('2016-07', '2016-08', dtype='datetime64[D]')

array(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
       '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
       '2016-07-09', '2016-07-10', '2016-07-11', '2016-07-12',
       '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16',
       '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
       '2016-07-21', '2016-07-22', '2016-07-23', '2016-07-24',
       '2016-07-25', '2016-07-26', '2016-07-27', '2016-07-28',
       '2016-07-29', '2016-07-30', '2016-07-31'], dtype='datetime64[D]')

#### 36. Extract the integer part of a random array of positive numbers using 4 different methods (★★☆)

In [459]:
Z = np.random.uniform(0,10,10)

print(Z - Z%1)
print(Z // 1)
print(np.floor(Z))
print(Z.astype(int))
print(np.trunc(Z))

[8. 9. 0. 5. 8. 0. 9. 7. 8. 3.]
[8. 9. 0. 5. 8. 0. 9. 7. 8. 3.]
[8. 9. 0. 5. 8. 0. 9. 7. 8. 3.]
[8 9 0 5 8 0 9 7 8 3]
[8. 9. 0. 5. 8. 0. 9. 7. 8. 3.]


In [464]:
Z

array([8.13829468, 9.45250775, 0.81597766, 5.10392599, 8.68995578,
       0.45496586, 9.22240799, 7.62539893, 8.03799815, 3.09668792])

In [463]:
Z % 1

array([0.13829468, 0.45250775, 0.81597766, 0.10392599, 0.68995578,
       0.45496586, 0.22240799, 0.62539893, 0.03799815, 0.09668792])

In [465]:
Z // 1

array([8., 9., 0., 5., 8., 0., 9., 7., 8., 3.])

In [466]:
np.floor(Z)

array([8., 9., 0., 5., 8., 0., 9., 7., 8., 3.])

In [467]:
Z.astype(int)

array([8, 9, 0, 5, 8, 0, 9, 7, 8, 3])

In [468]:
np.trunc(Z)

array([8., 9., 0., 5., 8., 0., 9., 7., 8., 3.])

#### 37. Create a 5x5 matrix with row values ranging from 0 to 4 (★★☆)

In [470]:
np.random.randint(low=0, high=4, size=(5,5))

array([[0, 0, 1, 1, 2],
       [1, 0, 1, 1, 2],
       [1, 2, 1, 0, 1],
       [3, 0, 2, 3, 3],
       [2, 3, 1, 2, 2]])

#### 38. Consider a generator function that generates 10 integers and use it to build an array (★☆☆)

In [474]:
def generator(size):
    for i in range(size):
        yield i
    return

np.array([i for i in generator(10)])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [475]:
def generate():
    for x in range(10):
        yield x

# The number of items to read from iterable. The default is -1, which means all data is read.
Z = np.fromiter(generate(),dtype=float, count=-1)

print(Z)

[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


#### 39. Create a vector of size 10 with values ranging from 0 to 1, both excluded (★★☆)


In [479]:
np.linspace(0,1,12)[1:-1]

array([0.09090909, 0.18181818, 0.27272727, 0.36363636, 0.45454545,
       0.54545455, 0.63636364, 0.72727273, 0.81818182, 0.90909091])

#### 40. Create a random vector of size 10 and sort it (★★☆)

In [483]:
arr = np.random.randn(3,10)
arr.sort(axis=1)
arr

array([[-1.43439621, -1.27436298, -0.95675948, -0.74161852, -0.1040121 ,
         0.51822477,  0.67857642,  1.15364777,  1.21842935,  1.83179134],
       [-1.44293286, -0.97655455, -0.58499816, -0.55309176, -0.51183248,
        -0.15377089,  0.00687253,  0.22616569,  0.50746826,  2.05195144],
       [-2.3244847 , -1.4495198 , -1.0159036 , -0.85591991, -0.84395537,
        -0.52813816, -0.24560439, -0.19047912, -0.05213646,  0.17029886]])

#### 41. How to sum a small array faster than np.sum? (★★☆)

In [484]:
arr = np.arange(10)
np.add.reduce(arr)

45

#### 45. Create random vector of size 10 and replace the maximum value by 0 (★★☆)


In [488]:
arr = np.random.randn(10)
arr[arr.argmax()] = 0
arr

array([-1.33926561, -1.35657154,  0.        ,  1.76727411, -0.23557547,
       -0.37203088, -0.52071336,  0.25819476,  1.061954  , -0.8285638 ])

#### 46. Create a structured array with `x` and `y` coordinates covering the [0,1]x[0,1] area (★★☆)

In [498]:
x = np.linspace(0,1,5)
y = np.linspace(0,1,5)
xv, yv = np.meshgrid(x,y)


In [502]:
Z = np.zeros((5,5), dtype=[('x',float),('y',float)])
Z['x'] = xv
Z

array([[(0.  , 0.), (0.25, 0.), (0.5 , 0.), (0.75, 0.), (1.  , 0.)],
       [(0.  , 0.), (0.25, 0.), (0.5 , 0.), (0.75, 0.), (1.  , 0.)],
       [(0.  , 0.), (0.25, 0.), (0.5 , 0.), (0.75, 0.), (1.  , 0.)],
       [(0.  , 0.), (0.25, 0.), (0.5 , 0.), (0.75, 0.), (1.  , 0.)],
       [(0.  , 0.), (0.25, 0.), (0.5 , 0.), (0.75, 0.), (1.  , 0.)]],
      dtype=[('x', '<f8'), ('y', '<f8')])

In [503]:
Z['y'] = yv
Z

array([[(0.  , 0.  ), (0.25, 0.  ), (0.5 , 0.  ), (0.75, 0.  ),
        (1.  , 0.  )],
       [(0.  , 0.25), (0.25, 0.25), (0.5 , 0.25), (0.75, 0.25),
        (1.  , 0.25)],
       [(0.  , 0.5 ), (0.25, 0.5 ), (0.5 , 0.5 ), (0.75, 0.5 ),
        (1.  , 0.5 )],
       [(0.  , 0.75), (0.25, 0.75), (0.5 , 0.75), (0.75, 0.75),
        (1.  , 0.75)],
       [(0.  , 1.  ), (0.25, 1.  ), (0.5 , 1.  ), (0.75, 1.  ),
        (1.  , 1.  )]], dtype=[('x', '<f8'), ('y', '<f8')])

In [491]:
xv, yv

[array([[0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ],
        [0.  , 0.25, 0.5 , 0.75, 1.  ]]),
 array([[0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.25, 0.25, 0.25, 0.25, 0.25],
        [0.5 , 0.5 , 0.5 , 0.5 , 0.5 ],
        [0.75, 0.75, 0.75, 0.75, 0.75],
        [1.  , 1.  , 1.  , 1.  , 1.  ]])]

#### 47. Given two arrays, X and Y, construct the Cauchy matrix C (Cij =1/(xi - yj)) (★★☆)

In [508]:
X = np.random.randn(4) 
Y = np.random.randn(4)

X = X.reshape(-1,1) # (B,) -> (B,1)
Y = Y.reshape(1,-1) # (B,) -> (1,B)
c_inverse = (X - Y)
c = 1 / (X-Y)
c

array([[ -2.63008117,  -0.63041094,   2.42851816, -12.07598802],
       [  6.74636012,  -0.94533826,   1.06358288,   2.24398629],
       [ -0.76712039,  -0.39846561,  -1.95470473,  -0.99386891],
       [ -0.5343877 ,  -0.3249548 ,  -0.92651751,  -0.63536716]])

In [513]:
c.shape

(4, 4)

In [511]:
C = 1.0 / np.subtract.outer(X, Y)
C

array([[[[ -2.63008117,  -0.63041094,   2.42851816, -12.07598802]]],


       [[[  6.74636012,  -0.94533826,   1.06358288,   2.24398629]]],


       [[[ -0.76712039,  -0.39846561,  -1.95470473,  -0.99386891]]],


       [[[ -0.5343877 ,  -0.3249548 ,  -0.92651751,  -0.63536716]]]])

In [512]:
C.shape

(4, 1, 1, 4)

#### 50. How to find the closest value (to a given scalar) in a vector? (★★☆)

In [514]:
scalar = 0.2 # np.random.uniform(0,1)
arr = np.random.randn(10)

min_idx = np.argmin(abs(arr - scalar))
arr[min_idx]

0.4125410861603555

In [523]:
# generate uniform value between 0 and 1
np.random.uniform(0,1)

0.3808741738393616

In [515]:
arr

array([-1.55093623,  0.93014061, -0.2364752 , -0.03620578, -0.41053307,
       -1.22516217, -1.49359436,  0.58668121,  0.41254109,  0.84619952])

In [517]:
index = (np.abs(arr-scalar)).argmin()
print(arr[index])

0.4125410861603555


#### 51. Create a structured array representing a position (x,y) and a color (r,g,b) (★★☆)


In [535]:
Z = np.zeros(10, dtype=[ ('position', [ ('x', float, 1),
                                  ('y', float, 1)]),
                   ('color',    [ ('r', float, 1),
                                  ('g', float, 1),
                                  ('b', float, 1)])])
print(Z)

[((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))
 ((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))
 ((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))
 ((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))
 ((0., 0.), (0., 0., 0.)) ((0., 0.), (0., 0., 0.))]


  Z = np.zeros(10, dtype=[ ('position', [ ('x', float, 1),


#### 52. Consider a random vector with shape (100,2) representing coordinates, find point by point distances (★★☆)


In [563]:
arr = np.random.randn(100,2)
# distance = sum_i((xi-yi)**2).sqrt()
arr1 = arr.reshape(100,1,2)
arr2 = arr.reshape(1,100,2)
res = np.sqrt(np.sum((arr1 - arr2)**2, axis=2))
res

array([[0.        , 1.37915842, 1.41010368, ..., 3.13802504, 0.33488855,
        1.29744483],
       [1.37915842, 0.        , 1.00018956, ..., 2.02671724, 1.30982611,
        1.12470699],
       [1.41010368, 1.00018956, 0.        , ..., 2.96843665, 1.12941008,
        0.21115367],
       ...,
       [3.13802504, 2.02671724, 2.96843665, ..., 0.        , 3.20837438,
        3.12740241],
       [0.33488855, 1.30982611, 1.12941008, ..., 3.20837438, 0.        ,
        0.99364413],
       [1.29744483, 1.12470699, 0.21115367, ..., 3.12740241, 0.99364413,
        0.        ]])

In [564]:
Z = arr
X,Y = np.atleast_2d(Z[:,0], Z[:,1])
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)
print(D)

[[0.         1.37915842 1.41010368 ... 3.13802504 0.33488855 1.29744483]
 [1.37915842 0.         1.00018956 ... 2.02671724 1.30982611 1.12470699]
 [1.41010368 1.00018956 0.         ... 2.96843665 1.12941008 0.21115367]
 ...
 [3.13802504 2.02671724 2.96843665 ... 0.         3.20837438 3.12740241]
 [0.33488855 1.30982611 1.12941008 ... 3.20837438 0.         0.99364413]
 [1.29744483 1.12470699 0.21115367 ... 3.12740241 0.99364413 0.        ]]


In [565]:
np.sqrt(((X - X.T)**2 + (Y-Y.T)**2))

array([[0.        , 1.37915842, 1.41010368, ..., 3.13802504, 0.33488855,
        1.29744483],
       [1.37915842, 0.        , 1.00018956, ..., 2.02671724, 1.30982611,
        1.12470699],
       [1.41010368, 1.00018956, 0.        , ..., 2.96843665, 1.12941008,
        0.21115367],
       ...,
       [3.13802504, 2.02671724, 2.96843665, ..., 0.        , 3.20837438,
        3.12740241],
       [0.33488855, 1.30982611, 1.12941008, ..., 3.20837438, 0.        ,
        0.99364413],
       [1.29744483, 1.12470699, 0.21115367, ..., 3.12740241, 0.99364413,
        0.        ]])

In [557]:
np.allclose(res, D)

True

In [566]:
# Much faster with scipy
import scipy
# Thanks Gavin Heverly-Coulson (#issue 1)
import scipy.spatial

Z = np.random.random((10,2))
D = scipy.spatial.distance.cdist(Z,Z)
print(D)

[[0.         0.13261599 0.53035249 0.66323427 0.75126628 0.73893211
  0.64587491 0.78107605 0.54896474 0.36110236]
 [0.13261599 0.         0.64917902 0.77859043 0.88173643 0.86397972
  0.77262336 0.91313015 0.68151906 0.49371801]
 [0.53035249 0.64917902 0.         0.13620807 0.29770559 0.57315365
  0.49120464 0.36413466 0.29196754 0.27824581]
 [0.66323427 0.77859043 0.13620807 0.         0.27284894 0.63329209
  0.56413982 0.35110923 0.37297753 0.40571784]
 [0.75126628 0.88173643 0.29770559 0.27284894 0.         0.41450601
  0.37511412 0.07913361 0.25003089 0.4028267 ]
 [0.73893211 0.86397972 0.57315365 0.63329209 0.41450601 0.
  0.09722364 0.3546582  0.28456355 0.42311671]
 [0.64587491 0.77262336 0.49120464 0.56413982 0.37511412 0.09722364
  0.         0.3299677  0.19926174 0.32599554]
 [0.78107605 0.91313015 0.36413466 0.35110923 0.07913361 0.3546582
  0.3299677  0.         0.24802042 0.42311626]
 [0.54896474 0.68151906 0.29196754 0.37297753 0.25003089 0.28456355
  0.19926174 0.248020

#### 53. How to convert a float (32 bits) array into an integer (32 bits) in place?

In [571]:
arr = np.random.randn(2,3)
assert arr.dtype == np.float64
arr.astype(np.int32)


dtype('float64')

#### 54. How to read the following file? (★★☆)

```
1, 2, 3, 4, 5
6,  ,  , 7, 8
 ,  , 9,10,11
```

In [574]:
from io import StringIO
# Fake file
s = StringIO('''1, 2, 3, 4, 5

                6,  ,  , 7, 8

                 ,  , 9,10,11
''')
Z = np.genfromtxt(s, delimiter=",", dtype=np.int32)
print(Z)

[[ 1  2  3  4  5]
 [ 6 -1 -1  7  8]
 [-1 -1  9 10 11]]


#### 55. What is the equivalent of enumerate for numpy arrays? (★★☆)

In [578]:
arr = np.random.randn(4,5)
print(arr)

[[ 0.58816099 -0.18413209  1.12744024  0.19073896 -0.23311172]
 [-0.34055486  0.71604779  0.83845414  0.34800331 -1.08613593]
 [ 1.52388439  0.49962839 -0.65352075 -0.70334088  0.21618474]
 [-0.49656601  1.56884503  1.73403019  1.32087836 -1.44682355]]


In [587]:
for index, value in np.ndenumerate(arr):
    print(index, value)

(0, 0) 0.588160989790001
(0, 1) -0.1841320852934122
(0, 2) 1.1274402409330648
(0, 3) 0.19073895653054868
(0, 4) -0.233111716728374
(1, 0) -0.34055486291940024
(1, 1) 0.7160477868475582
(1, 2) 0.8384541409442194
(1, 3) 0.34800331031083404
(1, 4) -1.0861359310839542
(2, 0) 1.523884392812116
(2, 1) 0.4996283858818928
(2, 2) -0.6535207540610698
(2, 3) -0.7033408805014214
(2, 4) 0.21618473995405665
(3, 0) -0.49656601107674464
(3, 1) 1.5688450283685245
(3, 2) 1.734030185881629
(3, 3) 1.320878358234996
(3, 4) -1.4468235454647957


In [None]:
A = np.arange(3).reshape(3,1)
B = np.arange(3).reshape(1,3)
it = np.nditer([A,B,None])
for x,y,z in it: z[...] = x + y

In [591]:
for i in np.ndindex(arr.shape):
    print(i)

(0, 0)
(0, 1)
(0, 2)
(0, 3)
(0, 4)
(1, 0)
(1, 1)
(1, 2)
(1, 3)
(1, 4)
(2, 0)
(2, 1)
(2, 2)
(2, 3)
(2, 4)
(3, 0)
(3, 1)
(3, 2)
(3, 3)
(3, 4)


#### 56. Generate a generic 2D Gaussian-like array (★★☆)

In [None]:
arr = np.random.randn(2,3)


In [592]:
X, Y = np.meshgrid(np.linspace(-1,1,10), np.linspace(-1,1,10))
D = np.sqrt(X*X+Y*Y)
sigma, mu = 1.0, 0.0
G = np.exp(-( (D-mu)**2 / ( 2.0 * sigma**2 ) ) )
print(G)

[[0.36787944 0.44822088 0.51979489 0.57375342 0.60279818 0.60279818
  0.57375342 0.51979489 0.44822088 0.36787944]
 [0.44822088 0.54610814 0.63331324 0.69905581 0.73444367 0.73444367
  0.69905581 0.63331324 0.54610814 0.44822088]
 [0.51979489 0.63331324 0.73444367 0.81068432 0.85172308 0.85172308
  0.81068432 0.73444367 0.63331324 0.51979489]
 [0.57375342 0.69905581 0.81068432 0.89483932 0.9401382  0.9401382
  0.89483932 0.81068432 0.69905581 0.57375342]
 [0.60279818 0.73444367 0.85172308 0.9401382  0.98773022 0.98773022
  0.9401382  0.85172308 0.73444367 0.60279818]
 [0.60279818 0.73444367 0.85172308 0.9401382  0.98773022 0.98773022
  0.9401382  0.85172308 0.73444367 0.60279818]
 [0.57375342 0.69905581 0.81068432 0.89483932 0.9401382  0.9401382
  0.89483932 0.81068432 0.69905581 0.57375342]
 [0.51979489 0.63331324 0.73444367 0.81068432 0.85172308 0.85172308
  0.81068432 0.73444367 0.63331324 0.51979489]
 [0.44822088 0.54610814 0.63331324 0.69905581 0.73444367 0.73444367
  0.69905581 0

### 57. How to randomly place p elements in a 2D array? (★★☆)

In [630]:
p = np.arange(6)

n = 10
p = 3
arr = np.zeros((10,10))

In [631]:
arr

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [612]:

np.random.choice(range(n*n), p, replace=False)

array([], shape=(0, 1, 2, 3, 4, 5), dtype=int64)

In [632]:
np.put(arr, np.random.choice(range(n*n), p, replace=False), 1)

In [634]:
range(n*n)

range(0, 100)

In [633]:
arr

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [621]:
n = 10
p = 3
Z = np.zeros((n,n))
np.put(Z, np.random.choice(range(n*n), p, replace=False),1)
print(Z)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]


In [638]:
np.put(Z, 4, 5)

In [639]:
Z

array([[0., 0., 0., 0., 5., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 5.]])

#### 58. Subtract the mean of each row of a matrix (★★☆)

In [642]:
arr = np.random.randn(3,4)
mean = arr.mean(axis=1).reshape(-1,1)
out = arr -  mean
out

array([[-0.48593374, -0.91584424, -0.47336603,  1.87514401],
       [-0.0165794 ,  1.23827796, -0.16359292, -1.05810564],
       [-0.01507912, -0.50069005,  0.08685133,  0.42891783]])

In [647]:
mean = arr.mean(axis=1, keepdims=True)

out2 = arr - mean
out2

array([[-0.48593374, -0.91584424, -0.47336603,  1.87514401],
       [-0.0165794 ,  1.23827796, -0.16359292, -1.05810564],
       [-0.01507912, -0.50069005,  0.08685133,  0.42891783]])

#### 59. How to sort an array by the nth column? (★★☆)

In [650]:
arr = np.random.randn(3,4)
np.sort(arr, axis=2)

AxisError: axis 2 is out of bounds for array of dimension 2

In [652]:
n = 2
arr[np.argsort(arr[:,n])]

array([[-0.36535971,  0.26737404, -0.23760077,  0.76739251],
       [-1.34692885, -0.91457998,  0.54363422, -0.3311822 ],
       [ 0.59387584, -0.16118981,  0.74986464,  0.49813251]])

#### 60. How to tell if a given 2D array has null columns? (★★☆)

In [655]:
arr = np.random.randn(3,4)
arr[:,2] = np.nan
arr

array([[ 0.79985346, -3.11668824,         nan, -1.05570492],
       [ 0.22247118, -0.12403024,         nan,  0.0978099 ],
       [-0.0370751 , -0.47981207,         nan,  0.74529397]])

In [665]:
arr.any(axis=0)

array([ True,  True,  True,  True])

In [667]:
np.isnan(arr).all(axis=0)

array([False, False,  True, False])

when null is 0

In [668]:
# null : 0 
Z = arr
arr[:,1] = 0
print((~Z.any(axis=0)).any())

True


In [671]:
Z

array([[ 0.79985346,  0.        ,         nan, -1.05570492],
       [ 0.22247118,  0.        ,         nan,  0.0978099 ],
       [-0.0370751 ,  0.        ,         nan,  0.74529397]])

In [669]:
Z.any(axis=0)

array([ True, False,  True,  True])

#### 61. Find the nearest value from a given value in an array (★★☆)

In [680]:
value = 1
arr = np.random.randint(low=0, high=10, size=(12,))
arr = np.random.uniform(0,1,(10))
arr

array([0.11892463, 0.91420248, 0.71680921, 0.52677798, 0.43387591,
       0.43599015, 0.72686141, 0.0077248 , 0.2455863 , 0.65933462])

In [681]:
val = 0.2
arr[abs(arr - val).argmin()]

0.2455862991064407

In [682]:
val = 0.2
m = arr.flat[np.abs(arr - val).argmin()]
print(m)

0.2455862991064407


In [684]:
arr.flat[2]

0.71680921039529

In [685]:
x = np.arange(1, 7).reshape(2, 3)
x

array([[1, 2, 3],
       [4, 5, 6]])

In [687]:
x.flat[5]

6

#### 64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★)

In [690]:
arr = np.zeros(10)
index = [0,0,1,1,2,2]
np.add.at(arr, index, 1)

In [691]:
arr

array([2., 2., 2., 0., 0., 0., 0., 0., 0., 0.])

#### 65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)

In [695]:
F = np.zeros(10)
X = np.arange(10)
I = np.random.randint(10, size=(10,))

In [696]:
I

array([3, 6, 6, 1, 9, 1, 8, 6, 6, 9])

In [700]:
np.put_along_axis(F, I, X, axis=0)

In [701]:
F

array([0., 5., 0., 0., 0., 0., 8., 0., 6., 9.])

In [704]:
I

array([3, 6, 6, 1, 9, 1, 8, 6, 6, 9])

In [705]:
X

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [703]:
F = np.bincount(I, weights=X)
F

array([ 0.,  8.,  0.,  0.,  0.,  0., 18.,  0.,  6., 13.])

In [706]:
# pos == 1, values is 3 and 5 = 8
# pos == 9, values is 4 and 9 = 13

66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★☆)

In [711]:
w, h = 256, 256
I = np.random.randint(0, 4, (h, w, 3))#.astype(np.ubyte)

In [712]:
I

array([[[1, 1, 2],
        [3, 1, 3],
        [2, 3, 2],
        ...,
        [0, 0, 1],
        [0, 3, 0],
        [2, 0, 1]],

       [[0, 3, 0],
        [3, 3, 0],
        [2, 1, 3],
        ...,
        [0, 3, 1],
        [1, 0, 1],
        [3, 0, 1]],

       [[1, 3, 0],
        [3, 2, 1],
        [2, 1, 2],
        ...,
        [0, 3, 1],
        [0, 2, 0],
        [0, 0, 2]],

       ...,

       [[1, 1, 1],
        [0, 0, 3],
        [1, 3, 2],
        ...,
        [3, 1, 3],
        [1, 1, 3],
        [3, 2, 0]],

       [[3, 2, 2],
        [1, 1, 1],
        [3, 3, 3],
        ...,
        [3, 1, 3],
        [3, 1, 2],
        [3, 0, 2]],

       [[1, 0, 0],
        [1, 2, 1],
        [3, 3, 3],
        ...,
        [2, 3, 3],
        [2, 3, 2],
        [1, 1, 2]]])

In [713]:
colors = np.unique(I.reshape(-1, 3), axis=0)
n = len(colors)

In [714]:
colors

array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 2],
       [0, 0, 3],
       [0, 1, 0],
       [0, 1, 1],
       [0, 1, 2],
       [0, 1, 3],
       [0, 2, 0],
       [0, 2, 1],
       [0, 2, 2],
       [0, 2, 3],
       [0, 3, 0],
       [0, 3, 1],
       [0, 3, 2],
       [0, 3, 3],
       [1, 0, 0],
       [1, 0, 1],
       [1, 0, 2],
       [1, 0, 3],
       [1, 1, 0],
       [1, 1, 1],
       [1, 1, 2],
       [1, 1, 3],
       [1, 2, 0],
       [1, 2, 1],
       [1, 2, 2],
       [1, 2, 3],
       [1, 3, 0],
       [1, 3, 1],
       [1, 3, 2],
       [1, 3, 3],
       [2, 0, 0],
       [2, 0, 1],
       [2, 0, 2],
       [2, 0, 3],
       [2, 1, 0],
       [2, 1, 1],
       [2, 1, 2],
       [2, 1, 3],
       [2, 2, 0],
       [2, 2, 1],
       [2, 2, 2],
       [2, 2, 3],
       [2, 3, 0],
       [2, 3, 1],
       [2, 3, 2],
       [2, 3, 3],
       [3, 0, 0],
       [3, 0, 1],
       [3, 0, 2],
       [3, 0, 3],
       [3, 1, 0],
       [3, 1, 1],
       [3, 1, 2],
       [3,

#### 68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset  indices? (★★★)

In [716]:
D = np.random.randn(10)
S = [0,0,0,1,1,1,2,2,2,2]

D_sums = np.bincount(S, weights=D)
D_counts = np.bincount(S)

In [717]:
D_sums 

array([ 3.00367887, -1.56492899,  0.07120616])

In [719]:
D_counts

array([3, 3, 4])

In [720]:
D_sums / D_counts

array([ 1.00122629, -0.521643  ,  0.01780154])

#### 69. How to get the diagonal of a dot product? (★★★)

In [723]:
arr1 = np.random.randn(2,3)
arr2 = np.random.randn(2,3)
np.einsum('ij,ij -> i', arr1, arr2)

array([ 0.95772641, -0.52772261])

In [724]:
np.diagonal(arr1.dot(arr2.T))

array([ 0.95772641, -0.52772261])

In [725]:
np.sum(arr1 * arr2, axis=1)

array([ 0.95772641, -0.52772261])

#### 70. Consider the vector [1, 2, 3, 4, 5], how to build a new vector with 3 consecutive zeros interleaved between each value? (★★★)

In [727]:
V = [1,2,3,4,5]
V[::3] = [0,0,0]

ValueError: attempt to assign sequence of size 3 to extended slice of size 2

In [739]:
V = [1,2,3,4,5]
nz = 3
# V = [1,0,0,0,2,0,0,0,3,0,0,0,4,0,0,0,5]
V0 = np.zeros(len(V) + (len(V)-1)*(nz))
V0[::3] = V

ValueError: could not broadcast input array from shape (5,) into shape (6,)

In [740]:
V0[::3] # 6 entries

array([0., 0., 0., 0., 0., 0.])

In [742]:
V0[::4] # 5 entries

array([0., 0., 0., 0., 0.])

In [743]:
V0[::4] = V
V0

array([1., 0., 0., 0., 2., 0., 0., 0., 3., 0., 0., 0., 4., 0., 0., 0., 5.])

#### 72. How to swap two rows of an array? (★★★)

In [748]:
arr = np.random.randn(3,4)
arr

array([[-0.14630951,  0.32804657, -0.31254678,  0.28583638],
       [ 0.13375134, -0.02502779, -0.09040018, -1.44258345],
       [-0.01959636, -0.00168695,  1.34631544, -1.39704729]])

In [750]:
arr[[0,1]] = arr[[1,0]]
arr

array([[ 0.13375134, -0.02502779, -0.09040018, -1.44258345],
       [-0.14630951,  0.32804657, -0.31254678,  0.28583638],
       [-0.01959636, -0.00168695,  1.34631544, -1.39704729]])

#### 73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the  triangles (★★★)


In [754]:
faces = np.random.randint(0,100,(10,3))
faces

array([[96, 19,  4],
       [48, 23, 30],
       [16, 51, 45],
       [81, 55, 42],
       [ 3, 87, 33],
       [29, 94, 31],
       [16, 41, 41],
       [89, 28, 32],
       [73, 88,  8],
       [37, 82, 58]])

In [755]:
faces.repeat(2,axis=1)

array([[96, 96, 19, 19,  4,  4],
       [48, 48, 23, 23, 30, 30],
       [16, 16, 51, 51, 45, 45],
       [81, 81, 55, 55, 42, 42],
       [ 3,  3, 87, 87, 33, 33],
       [29, 29, 94, 94, 31, 31],
       [16, 16, 41, 41, 41, 41],
       [89, 89, 28, 28, 32, 32],
       [73, 73, 88, 88,  8,  8],
       [37, 37, 82, 82, 58, 58]])

In [756]:
F = np.roll(faces.repeat(2,axis=1),-1,axis=1)
F

array([[96, 19, 19,  4,  4, 96],
       [48, 23, 23, 30, 30, 48],
       [16, 51, 51, 45, 45, 16],
       [81, 55, 55, 42, 42, 81],
       [ 3, 87, 87, 33, 33,  3],
       [29, 94, 94, 31, 31, 29],
       [16, 41, 41, 41, 41, 16],
       [89, 28, 28, 32, 32, 89],
       [73, 88, 88,  8,  8, 73],
       [37, 82, 82, 58, 58, 37]])

In [757]:
F = F.reshape(len(F)*3,2)
F

array([[96, 19],
       [19,  4],
       [ 4, 96],
       [48, 23],
       [23, 30],
       [30, 48],
       [16, 51],
       [51, 45],
       [45, 16],
       [81, 55],
       [55, 42],
       [42, 81],
       [ 3, 87],
       [87, 33],
       [33,  3],
       [29, 94],
       [94, 31],
       [31, 29],
       [16, 41],
       [41, 41],
       [41, 16],
       [89, 28],
       [28, 32],
       [32, 89],
       [73, 88],
       [88,  8],
       [ 8, 73],
       [37, 82],
       [82, 58],
       [58, 37]])

In [758]:
F = np.sort(F,axis=1)
F

array([[19, 96],
       [ 4, 19],
       [ 4, 96],
       [23, 48],
       [23, 30],
       [30, 48],
       [16, 51],
       [45, 51],
       [16, 45],
       [55, 81],
       [42, 55],
       [42, 81],
       [ 3, 87],
       [33, 87],
       [ 3, 33],
       [29, 94],
       [31, 94],
       [29, 31],
       [16, 41],
       [41, 41],
       [16, 41],
       [28, 89],
       [28, 32],
       [32, 89],
       [73, 88],
       [ 8, 88],
       [ 8, 73],
       [37, 82],
       [58, 82],
       [37, 58]])

In [760]:
G = F.view( dtype=[('p0',F.dtype),('p1',F.dtype)] )
G

array([[(19, 96)],
       [( 4, 19)],
       [( 4, 96)],
       [(23, 48)],
       [(23, 30)],
       [(30, 48)],
       [(16, 51)],
       [(45, 51)],
       [(16, 45)],
       [(55, 81)],
       [(42, 55)],
       [(42, 81)],
       [( 3, 87)],
       [(33, 87)],
       [( 3, 33)],
       [(29, 94)],
       [(31, 94)],
       [(29, 31)],
       [(16, 41)],
       [(41, 41)],
       [(16, 41)],
       [(28, 89)],
       [(28, 32)],
       [(32, 89)],
       [(73, 88)],
       [( 8, 88)],
       [( 8, 73)],
       [(37, 82)],
       [(58, 82)],
       [(37, 58)]], dtype=[('p0', '<i8'), ('p1', '<i8')])

In [761]:
G = np.unique(G)
print(G)

[( 3, 33) ( 3, 87) ( 4, 19) ( 4, 96) ( 8, 73) ( 8, 88) (16, 41) (16, 45)
 (16, 51) (19, 96) (23, 30) (23, 48) (28, 32) (28, 89) (29, 31) (29, 94)
 (30, 48) (31, 94) (32, 89) (33, 87) (37, 58) (37, 82) (41, 41) (42, 55)
 (42, 81) (45, 51) (55, 81) (58, 82) (73, 88)]


#### 74. Given a sorted array C that corresponds to a bincount, how to produce an array A such that np.bincount(A) == C? (★★★)

In [764]:
C = np.bincount([1,1,2,3,4,4,6])
# The number of repetitions for each element. repeats is broadcasted to fit the shape of the given axis.
A = np.repeat(np.arange(len(C)), C)
print(A)

[1 1 2 3 4 4 6]


In [765]:
C

array([0, 2, 1, 1, 2, 0, 1])

In [766]:
np.arange(len(C))

array([0, 1, 2, 3, 4, 5, 6])

#### 75. How to compute averages using a sliding window over an array? (★★★)

In [767]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n
Z = np.arange(20)
print(moving_average(Z, n=3))

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.]


In [770]:
n = 3
cs = np.cumsum(Z)
cs

array([  0,   1,   3,   6,  10,  15,  21,  28,  36,  45,  55,  66,  78,
        91, 105, 120, 136, 153, 171, 190])

In [776]:
cs = np.cumsum(Z)
cs[n:] = cs[n:] - cs[:-n]
cs[n:]

array([ 6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54])

In [780]:
cs

array([ 0,  1,  3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45,
       48, 51, 54])

In [779]:
cs[n-1:] / n

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18.])

In [None]:
[0,1,2,3,4,5,6,7,8,9,10]

In [784]:
from numpy.lib.stride_tricks import sliding_window_view
sliding_window = sliding_window_view(Z, window_shape=3)
sliding_window

array([[ 0,  1,  2],
       [ 1,  2,  3],
       [ 2,  3,  4],
       [ 3,  4,  5],
       [ 4,  5,  6],
       [ 5,  6,  7],
       [ 6,  7,  8],
       [ 7,  8,  9],
       [ 8,  9, 10],
       [ 9, 10, 11],
       [10, 11, 12],
       [11, 12, 13],
       [12, 13, 14],
       [13, 14, 15],
       [14, 15, 16],
       [15, 16, 17],
       [16, 17, 18],
       [17, 18, 19]])

In [785]:
sliding_window.mean(axis=-1)

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18.])

#### 76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is  shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★)

In [790]:
Z = np.arange(12)
Z

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [791]:
Z.reshape(-1,3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [792]:
from numpy.lib import stride_tricks

def rolling(a, window):
    shape = (a.size - window + 1, window)
    strides = (a.strides[0], a.strides[0])
    return stride_tricks.as_strided(a, shape=shape, strides=strides)
Z = rolling(np.arange(10), 3)
print(Z)

# Author: Jeff Luo (@Jeff1999)

Z = np.arange(10)
print(sliding_window_view(Z, window_shape=3))

[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]
 [5 6 7]
 [6 7 8]
 [7 8 9]]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]
 [5 6 7]
 [6 7 8]
 [7 8 9]]


In [794]:
window = 3
(Z.size - window + 1, window)

(8, 3)

In [799]:
Z.strides

(8,)

In [801]:
# shape is the output shape that we want. 
stride_tricks.as_strided(Z, shape=(Z.size - window + 1, window), strides=(Z.strides[0],Z.strides[0]))

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6],
       [5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

In [802]:
# otherwise
sliding_window_view(Z, window_shape=3)

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6],
       [5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

#### 77. How to negate a boolean, or to change the sign of a float inplace? (★★★)

In [810]:
Z = np.random.randint(0,2,100)
Z

array([0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1])

In [814]:
arr2 = Z.copy()
# out: f not provided or None, a freshly-allocated array is returned.
np.logical_not(arr2, out=arr2) # do inplace
arr2

array([0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1])

In [818]:
Z = np.random.uniform(-1.0,1.0,100)
Z

array([ 0.19781699, -0.17005326,  0.62951396,  0.63166529,  0.10489033,
       -0.79346749, -0.2421489 , -0.58242948,  0.48281369, -0.99523328,
        0.05774931, -0.48761676, -0.57400641,  0.94767106, -0.26661449,
       -0.96373705, -0.70237614,  0.01586019, -0.18577113, -0.96359665,
        0.77233529, -0.11414287, -0.0419412 ,  0.28929492, -0.25114257,
       -0.71090687, -0.07638582,  0.32462081, -0.36612893, -0.4971453 ,
       -0.1601139 ,  0.2251824 ,  0.24522814,  0.81791926, -0.20034213,
        0.60809556, -0.49664228, -0.30223626,  0.15734709, -0.48106517,
       -0.19844774, -0.26329489, -0.68532491, -0.70633334,  0.70168121,
        0.56080756,  0.82346878,  0.75825587, -0.87617151, -0.73208472,
       -0.21586617, -0.02855463, -0.46661186,  0.93017719,  0.60296849,
        0.46166312, -0.91190086, -0.8775438 , -0.29462907,  0.50883839,
       -0.83355456, -0.20707596,  0.12988792, -0.79171167,  0.73224449,
        0.46233164,  0.41618394, -0.90373281,  0.81745394, -0.40

In [819]:
z2 = Z.copy()
np.negative(Z, out=z2)

array([-0.19781699,  0.17005326, -0.62951396, -0.63166529, -0.10489033,
        0.79346749,  0.2421489 ,  0.58242948, -0.48281369,  0.99523328,
       -0.05774931,  0.48761676,  0.57400641, -0.94767106,  0.26661449,
        0.96373705,  0.70237614, -0.01586019,  0.18577113,  0.96359665,
       -0.77233529,  0.11414287,  0.0419412 , -0.28929492,  0.25114257,
        0.71090687,  0.07638582, -0.32462081,  0.36612893,  0.4971453 ,
        0.1601139 , -0.2251824 , -0.24522814, -0.81791926,  0.20034213,
       -0.60809556,  0.49664228,  0.30223626, -0.15734709,  0.48106517,
        0.19844774,  0.26329489,  0.68532491,  0.70633334, -0.70168121,
       -0.56080756, -0.82346878, -0.75825587,  0.87617151,  0.73208472,
        0.21586617,  0.02855463,  0.46661186, -0.93017719, -0.60296849,
       -0.46166312,  0.91190086,  0.8775438 ,  0.29462907, -0.50883839,
        0.83355456,  0.20707596, -0.12988792,  0.79171167, -0.73224449,
       -0.46233164, -0.41618394,  0.90373281, -0.81745394,  0.40

#### 82. Compute a matrix rank (★★★)

In [820]:
Z = np.random.uniform(0,1,(10,10))
U, S, V = np.linalg.svd(Z) # Singular Value Decomposition
rank = np.sum(S > 1e-10)
print(rank)

10


In [821]:
rank = np.linalg.matrix_rank(Z)
print(rank)

10


#### 83. How to find the most frequent value in an array?

In [825]:
arr = np.random.randint(0,10, size=(20))
np.bincount(arr).argmax()

4

#### 84. Extract all the contiguous 3x3 blocks from a random 10x10 matrix (★★★)

In [831]:
arr = np.arange(100).reshape(10,10)
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [832]:
sliding_window_view(arr, window_shape=(3,3))

array([[[[ 0,  1,  2],
         [10, 11, 12],
         [20, 21, 22]],

        [[ 1,  2,  3],
         [11, 12, 13],
         [21, 22, 23]],

        [[ 2,  3,  4],
         [12, 13, 14],
         [22, 23, 24]],

        [[ 3,  4,  5],
         [13, 14, 15],
         [23, 24, 25]],

        [[ 4,  5,  6],
         [14, 15, 16],
         [24, 25, 26]],

        [[ 5,  6,  7],
         [15, 16, 17],
         [25, 26, 27]],

        [[ 6,  7,  8],
         [16, 17, 18],
         [26, 27, 28]],

        [[ 7,  8,  9],
         [17, 18, 19],
         [27, 28, 29]]],


       [[[10, 11, 12],
         [20, 21, 22],
         [30, 31, 32]],

        [[11, 12, 13],
         [21, 22, 23],
         [31, 32, 33]],

        [[12, 13, 14],
         [22, 23, 24],
         [32, 33, 34]],

        [[13, 14, 15],
         [23, 24, 25],
         [33, 34, 35]],

        [[14, 15, 16],
         [24, 25, 26],
         [34, 35, 36]],

        [[15, 16, 17],
         [25, 26, 27],
         [35, 36, 37]],

    

#### 86. Consider a set of p matrices with shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★)

In [834]:
p = 5
n=3
a1 = np.stack([np.random.randn(n,n) for _ in range(p)]) # (n,n) -> (p,n,n)
a2 = np.stack([np.random.randn(n,1) for _ in range(p)]) # (n,1) -> (p,n,1)

In [835]:
a1.reshape(n,n,p).dot(a2).

(5, 3, 3)

In [837]:
# using einsum
np.einsum('ijk,ikl -> jl', a1, a2)

array([[ 2.04073632],
       [-1.82424532],
       [-4.93335053]])

In [836]:
# dot product between p and p and then n and n
S = np.tensordot(a1, a2, axes=[[0, 2], [0, 1]]) # a1=(p,n,n), a2= (p,n,1) -> (n,1)
print(S)

[[ 2.04073632]
 [-1.82424532]
 [-4.93335053]]


#### 87. Consider a 16x16 array, how to get the block-sum (block size is 4x4)? (★★★)

In [851]:
Z = np.arange(16*16).reshape(16,16)
k = 4

windows = np.lib.stride_tricks.sliding_window_view(Z, (k, k))

In [852]:
windows.shape

(13, 13, 4, 4)

In [857]:
windows[:5,:5]

array([[[[  0,   1,   2,   3],
         [ 16,  17,  18,  19],
         [ 32,  33,  34,  35],
         [ 48,  49,  50,  51]],

        [[  1,   2,   3,   4],
         [ 17,  18,  19,  20],
         [ 33,  34,  35,  36],
         [ 49,  50,  51,  52]],

        [[  2,   3,   4,   5],
         [ 18,  19,  20,  21],
         [ 34,  35,  36,  37],
         [ 50,  51,  52,  53]],

        [[  3,   4,   5,   6],
         [ 19,  20,  21,  22],
         [ 35,  36,  37,  38],
         [ 51,  52,  53,  54]],

        [[  4,   5,   6,   7],
         [ 20,  21,  22,  23],
         [ 36,  37,  38,  39],
         [ 52,  53,  54,  55]]],


       [[[ 16,  17,  18,  19],
         [ 32,  33,  34,  35],
         [ 48,  49,  50,  51],
         [ 64,  65,  66,  67]],

        [[ 17,  18,  19,  20],
         [ 33,  34,  35,  36],
         [ 49,  50,  51,  52],
         [ 65,  66,  67,  68]],

        [[ 18,  19,  20,  21],
         [ 34,  35,  36,  37],
         [ 50,  51,  52,  53],
         [ 66,  67,  68

In [843]:
S = windows[::k, ::k, ...].sum(axis=(-2, -1))
S.shape

(4, 4)

In [860]:
S

array([[16., 16., 16., 16.],
       [16., 16., 16., 16.],
       [16., 16., 16., 16.],
       [16., 16., 16., 16.]])

In [853]:
windows.shape

(13, 13, 4, 4)

In [854]:
windows[::k,::k,...]

array([[[[  0,   1,   2,   3],
         [ 16,  17,  18,  19],
         [ 32,  33,  34,  35],
         [ 48,  49,  50,  51]],

        [[  4,   5,   6,   7],
         [ 20,  21,  22,  23],
         [ 36,  37,  38,  39],
         [ 52,  53,  54,  55]],

        [[  8,   9,  10,  11],
         [ 24,  25,  26,  27],
         [ 40,  41,  42,  43],
         [ 56,  57,  58,  59]],

        [[ 12,  13,  14,  15],
         [ 28,  29,  30,  31],
         [ 44,  45,  46,  47],
         [ 60,  61,  62,  63]]],


       [[[ 64,  65,  66,  67],
         [ 80,  81,  82,  83],
         [ 96,  97,  98,  99],
         [112, 113, 114, 115]],

        [[ 68,  69,  70,  71],
         [ 84,  85,  86,  87],
         [100, 101, 102, 103],
         [116, 117, 118, 119]],

        [[ 72,  73,  74,  75],
         [ 88,  89,  90,  91],
         [104, 105, 106, 107],
         [120, 121, 122, 123]],

        [[ 76,  77,  78,  79],
         [ 92,  93,  94,  95],
         [108, 109, 110, 111],
         [124, 125, 126

In [855]:
windows.sum(axis=(-2,-1))

array([[ 408,  424,  440,  456,  472,  488,  504,  520,  536,  552,  568,
         584,  600],
       [ 664,  680,  696,  712,  728,  744,  760,  776,  792,  808,  824,
         840,  856],
       [ 920,  936,  952,  968,  984, 1000, 1016, 1032, 1048, 1064, 1080,
        1096, 1112],
       [1176, 1192, 1208, 1224, 1240, 1256, 1272, 1288, 1304, 1320, 1336,
        1352, 1368],
       [1432, 1448, 1464, 1480, 1496, 1512, 1528, 1544, 1560, 1576, 1592,
        1608, 1624],
       [1688, 1704, 1720, 1736, 1752, 1768, 1784, 1800, 1816, 1832, 1848,
        1864, 1880],
       [1944, 1960, 1976, 1992, 2008, 2024, 2040, 2056, 2072, 2088, 2104,
        2120, 2136],
       [2200, 2216, 2232, 2248, 2264, 2280, 2296, 2312, 2328, 2344, 2360,
        2376, 2392],
       [2456, 2472, 2488, 2504, 2520, 2536, 2552, 2568, 2584, 2600, 2616,
        2632, 2648],
       [2712, 2728, 2744, 2760, 2776, 2792, 2808, 2824, 2840, 2856, 2872,
        2888, 2904],
       [2968, 2984, 3000, 3016, 3032, 3048, 3064, 

In [859]:
Z = np.ones((16,16))
k = 4
S = np.add.reduceat(
    np.add.reduceat(Z, np.arange(0, Z.shape[0], k), axis=0),
    np.arange(0, Z.shape[1], k), axis=1)
print(S)

[[16. 16. 16. 16.]
 [16. 16. 16. 16.]
 [16. 16. 16. 16.]
 [16. 16. 16. 16.]]


In [868]:
a = np.arange(16*16).reshape(16,16)
np.add.reduceat(a, np.arange(0, a.shape[0], k), axis=0)

array([[ 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
        148, 152, 156],
       [352, 356, 360, 364, 368, 372, 376, 380, 384, 388, 392, 396, 400,
        404, 408, 412],
       [608, 612, 616, 620, 624, 628, 632, 636, 640, 644, 648, 652, 656,
        660, 664, 668],
       [864, 868, 872, 876, 880, 884, 888, 892, 896, 900, 904, 908, 912,
        916, 920, 924]])

In [881]:
np.add.reduceat(a, np.arange(0, a.shape[0], k), axis=0).shape

(4, 16)

In [869]:
np.arange(0, a.shape[0], k)

array([ 0,  4,  8, 12])

In [885]:
a[0:4,:].sum(axis=0)

array([ 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
       148, 152, 156])

In [877]:
a

array([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15],
       [ 16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
         29,  30,  31],
       [ 32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
         45,  46,  47],
       [ 48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,
         61,  62,  63],
       [ 64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
         77,  78,  79],
       [ 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,
         93,  94,  95],
       [ 96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
        109, 110, 111],
       [112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
        125, 126, 127],
       [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
        141, 142, 143],
       [144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
        157, 158, 159],
       [160, 161, 162, 163, 16

In [858]:
np.arange(0, Z.shape[0], k),

(array([ 0,  4,  8, 12]),)

#### 89. How to get the n largest values of an array (★★★)

In [886]:
Z = np.arange(10000)
np.random.shuffle(Z)
n = 5

# Slow
print (Z[np.argsort(Z)[-n:]])

# Fast
print (Z[np.argpartition(-Z,n)[:n]])

[9995 9996 9997 9998 9999]
[9999 9998 9996 9997 9995]


In [887]:
def cartesian(arrays):
    arrays = [np.asarray(a) for a in arrays]
    shape = (len(x) for x in arrays)

    ix = np.indices(shape, dtype=int)
    ix = ix.reshape(len(arrays), -1).T

    for n, arr in enumerate(arrays):
        ix[:, n] = arrays[n][ix[:, n]]

    return ix

print (cartesian(([1, 2, 3], [4, 5], [6, 7])))

[[1 4 6]
 [1 4 7]
 [1 5 6]
 [1 5 7]
 [2 4 6]
 [2 4 7]
 [2 5 6]
 [2 5 7]
 [3 4 6]
 [3 4 7]
 [3 5 6]
 [3 5 7]]


In [901]:
arrays_og = [1, 2, 3], [4, 5], [6, 7]
arrays = [np.asarray(a) for a in arrays_og]
shape = [len(x) for x in arrays]

# x, y, z position
ix = np.indices(shape, dtype=int) # (len(arrays), *shape)
ix2 = ix.reshape(len(arrays), -1).T

In [904]:
ix.reshape(len(arrays),-1)

array([[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
       [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],
       [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]])

In [905]:
ix.reshape(len(arrays),-1).T

array([[0, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 1],
       [1, 0, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 1],
       [2, 0, 0],
       [2, 0, 1],
       [2, 1, 0],
       [2, 1, 1]])

In [908]:
arrays[0][ix2[:,0]]

array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])

In [910]:
arrays[1][ix2[:,1]]

array([4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5])

In [912]:
arrays[2][ix2[:,2]]

array([6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7])

In [891]:
arrays

[array([1, 2, 3]), array([4, 5]), array([6, 7])]

In [896]:
shape

[3, 2, 2]

In [899]:
ix.shape # ()

(3, 3, 2, 2)

In [898]:
np.indices(shape, dtype=int)[0]

array([[[0, 0],
        [0, 0]],

       [[1, 1],
        [1, 1]],

       [[2, 2],
        [2, 2]]])

In [900]:
np.indices(shape, dtype=int)[1]

array([[[0, 0],
        [1, 1]],

       [[0, 0],
        [1, 1]],

       [[0, 0],
        [1, 1]]])

#### 92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★)


In [913]:
x = np.random.rand(int(5e7))

%timeit np.power(x,3)
%timeit x*x*x
%timeit np.einsum('i,i,i->i',x,x,x)

411 ms ± 5.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
69.7 ms ± 1.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
70.1 ms ± 602 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### 93. Consider two arrays A and B of shape (8,3) and (2,2). How to find rows of A that contain elements of each row of B regardless of the order of the elements in B? (★★★)


In [914]:
A = np.random.randint(0,5,(8,3))
B = np.random.randint(0,5,(2,2))

C = (A[..., np.newaxis, np.newaxis] == B)

In [921]:
A # array([0, 1, 3, 4, 6, 7])

array([[1, 3, 4],
       [4, 0, 1],
       [1, 1, 1],
       [3, 1, 4],
       [2, 2, 0],
       [0, 3, 1],
       [2, 4, 0],
       [4, 3, 3]])

In [None]:
v

In [922]:
B

array([[4, 2],
       [4, 0]])

In [915]:
C

array([[[[False, False],
         [False, False]],

        [[False, False],
         [False, False]],

        [[ True, False],
         [ True, False]]],


       [[[ True, False],
         [ True, False]],

        [[False, False],
         [False,  True]],

        [[False, False],
         [False, False]]],


       [[[False, False],
         [False, False]],

        [[False, False],
         [False, False]],

        [[False, False],
         [False, False]]],


       [[[False, False],
         [False, False]],

        [[False, False],
         [False, False]],

        [[ True, False],
         [ True, False]]],


       [[[False,  True],
         [False, False]],

        [[False,  True],
         [False, False]],

        [[False, False],
         [False,  True]]],


       [[[False, False],
         [False,  True]],

        [[False, False],
         [False, False]],

        [[False, False],
         [False, False]]],


       [[[False,  True],
         [False, False]],



In [925]:
C.shape

(8, 3, 2, 2)

In [924]:
rows = np.where(C.any((3,1)).all(1))[0]
rows

array([0, 1, 3, 4, 6, 7])

#### 94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★)

In [929]:
Z = np.random.randint(0,5,(10,3))
Z

array([[0, 3, 2],
       [0, 0, 0],
       [3, 4, 0],
       [0, 0, 4],
       [0, 1, 3],
       [3, 4, 4],
       [2, 0, 4],
       [4, 0, 1],
       [2, 4, 1],
       [1, 0, 3]])

In [932]:
Z[:,1:]

array([[3, 2],
       [0, 0],
       [4, 0],
       [0, 4],
       [1, 3],
       [4, 4],
       [0, 4],
       [0, 1],
       [4, 1],
       [0, 3]])

In [933]:
Z[:,:-1]

array([[0, 3],
       [0, 0],
       [3, 4],
       [0, 0],
       [0, 1],
       [3, 4],
       [2, 0],
       [4, 0],
       [2, 4],
       [1, 0]])

In [931]:
# solution for arrays of all dtypes (including string arrays and record arrays)
E = np.all(Z[:,1:] == Z[:,:-1], axis=1)
E

array([False,  True, False, False, False, False, False, False, False,
       False])

In [934]:
U = Z[~E]
U

array([[0, 3, 2],
       [3, 4, 0],
       [0, 0, 4],
       [0, 1, 3],
       [3, 4, 4],
       [2, 0, 4],
       [4, 0, 1],
       [2, 4, 1],
       [1, 0, 3]])

#### 96. Given a two dimensional array, how to extract unique rows? (★★★)

In [935]:
uZ = np.unique(Z, axis=0)
print(uZ)

[[0 0 0]
 [0 0 4]
 [0 1 3]
 [0 3 2]
 [1 0 3]
 [2 0 4]
 [2 4 1]
 [3 4 0]
 [3 4 4]
 [4 0 1]]


#### 97. Considering 2 vectors A & B, write the einsum equivalent of inner, outer, sum, and mul function (★★★)


In [940]:
a = np.random.randn(3)
b = np.random.randn(3)

In [941]:
# inner
np.einsum('i,j->', a, b)

2.39166428653553

In [950]:
# inner
np.einsum('i,i', a, b)

-0.17948615712789384

In [942]:
# outer
np.einsum('i,j->ij', a, b)

array([[ 0.03593506,  0.06627533,  0.02867029],
       [-0.67910163, -1.25247268, -0.54181172],
       [ 1.29983038,  2.39728779,  1.03705146]])

In [943]:
# product
np.einsum('i,i->i', a, b)

array([ 0.03593506, -1.25247268,  1.03705146])

In [944]:
a * b

array([ 0.03593506, -1.25247268,  1.03705146])

In [946]:
# sum
np.einsum('i->', a)

-0.6449402662937395

#### 100. Compute bootstrapped 95% confidence intervals for the mean of a 1D array X (i.e., resample the elements of an array with replacement N times, compute the mean of each sample, and then compute percentiles over the means). (★★★)


In [957]:
X = np.random.randn(10)

N = 1000 # number of bootstrap samples
idx = np.random.randint(0, X.size, (N, X.size)) # (1000, 10)

means = X[idx].mean(axis=1) # (1000)
confint = np.percentile(means, [2.5, 97.5])
confint # ()

array([-0.59217843,  0.82580126])