# 100 numpy exercises

This is a collection of exercises that have been collected in the numpy mailing list, on stack overflow
and in the numpy documentation. The goal of this collection is to offer a quick reference for both old
and new users but also to provide a set of exercises for those who teach.


If you find an error or think you've a better way to solve some of them, feel
free to open an issue at <https://github.com/rougier/numpy-100>.

File automatically generated. See the documentation to update questions/answers/hints programmatically.

Run the `initialize.py` module, then call a random question with `pick()` an hint towards its solution with
`hint(n)` and the answer with `answer(n)`, where n is the number of the picked question.

In [1]:
%run initialise.py

In [2]:
import numpy as np

In [117]:
pick()

92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★)


#### 38. Consider a generator function that generates 10 integers and use it to build an array (★☆☆)

In [4]:
# p 38
def generate() :
    arr = []
    for i in range(10) :
        arr.append(np.random.randint(1500))
    return arr

x = generate()
print(x)

[1263, 645, 376, 55, 543, 1, 392, 795, 1415, 779]


In [5]:
def generate_answer() :
    for x in range(10):
        yield x 

print(np.fromiter(generate_answer(), float, count=10))


[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


##### answer

In [6]:
answer(38)

def generate():
    for x in range(10):
        yield x
Z = np.fromiter(generate(),dtype=float,count=-1)
print(Z)


In [7]:
def generate():
    for x in range(10):
        yield x # can't return.. => because iteration!
Z = np.fromiter(generate(),dtype=float,count=-1)
print(Z)
# Fromiter function => parameter: iteration, type => So, iteration can be function!
# Count => How much iteration? => can made multiplication table by (i<j)! or.. binomial coefficient ?

[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


##### Which is the fatest code?

In [8]:
%timeit -n 10000 np.fromiter(generate(), dtype = int, count = -1)
%timeit -n 10000 generate_answer
%timeit -n 10000 np.fromiter((i for i in range(10)), dtype  = int, count = -1)

# Fromiter is much faster than generate_answer!

1.52 µs ± 126 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
14.1 ns ± 0.132 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
1.58 µs ± 159 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#### 21. Create a checkerboard 8x8 matrix using the tile function (★☆☆)

In [9]:
x = np.tile([[0, 1],[1, 0]], (4, 4))
print(x)
type(np.array([[0,1],[1,0]]))

[[0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]]


numpy.ndarray

In [10]:
answer(21)

Z = np.tile( np.array([[0,1],[1,0]]), (4,4))
print(Z)


In [11]:
Z = np.tile( np.array([[0,1],[1,0]]), (4,4))
print(Z)
type(Z)

[[0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]]


numpy.ndarray

##### Is it faster to construct using ndarray than list ?

In [12]:
%timeit -n 10000 np.tile( np.array([[0,1],[1,0]]), (4,4))
%timeit -n 10000 np.tile([[0, 1],[1, 0]], (4, 4))
# similar!
# Below is little faster because of transformation.. but.. hmm
# I think ndarray is more stable than list.

4.01 µs ± 171 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
3.83 µs ± 160 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#### 45. Create random vector of size 10 and replace the maximum value by 0 (★★☆)

In [13]:
arr = np.random.randint(1500, size = 10)
arr

array([ 223, 1213, 1259,  617,  430,  691,  497,  475,  951, 1001])

In [14]:
arr[np.argmax(arr)] = 0
arr

array([ 223, 1213,    0,  617,  430,  691,  497,  475,  951, 1001])

In [15]:
answer(45)

Z = np.random.random(10)
Z[Z.argmax()] = 0
print(Z)


In [16]:
def answers() :
    Z = np.random.random(10)
    Z[Z.argmax()] = 0
    return Z

def my_answer() :
    arr = np.random.randint(1500, size = 10)
    arr[np.argmax(arr)] = 0
    return arr

%timeit -n 10000 answers()
%timeit -n 10000 my_answer()

988 ns ± 59.2 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
6.29 µs ± 197 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#### 52. Consider a random vector with shape (100,2) representing coordinates, find point by point distances (★★☆)

In [17]:
answer(52)

Z = np.random.random((10,2))
X,Y = np.atleast_2d(Z[:,0], Z[:,1])
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)
print(D)

# Much faster with scipy
import scipy
# Thanks Gavin Heverly-Coulson (#issue 1)
import scipy.spatial

Z = np.random.random((10,2))
D = scipy.spatial.distance.cdist(Z,Z)
print(D)


In [46]:
Z = np.random.random((10,2))
X, Y = Z[: , 0].reshape(1, -1), Z[:, 1].reshape(1, -1)
# X,Y = np.atleast_2d(Z[:,0], Z[:,1])
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)
print(D)

# Much faster with scipy
import scipy
# Thanks Gavin Heverly-Coulson (#issue 1)
import scipy.spatial

Z = np.random.random((10,2))
D = scipy.spatial.distance.cdist(Z, Z)
print(D)

[[0.         0.18791461 0.17783022 0.48230442 0.18396528 0.3497838
  0.74318482 0.67019526 0.67539626 0.61761597]
 [0.18791461 0.         0.20010891 0.32815297 0.3692325  0.4032246
  0.75721608 0.69342096 0.77616148 0.57510026]
 [0.17783022 0.20010891 0.         0.36030943 0.27726901 0.20503698
  0.58039033 0.51108392 0.57640776 0.43993397]
 [0.48230442 0.32815297 0.36030943 0.         0.63187576 0.45668016
  0.64648859 0.609411   0.81648815 0.40031445]
 [0.18396528 0.3692325  0.27726901 0.63187576 0.         0.34796972
  0.7315298  0.65432985 0.57584422 0.66872464]
 [0.3497838  0.4032246  0.20503698 0.45668016 0.34796972 0.
  0.39598333 0.32139747 0.38223988 0.3298261 ]
 [0.74318482 0.75721608 0.58039033 0.64648859 0.7315298  0.39598333
  0.         0.07742925 0.35938968 0.25470967]
 [0.67019526 0.69342096 0.51108392 0.609411   0.65432985 0.32139747
  0.07742925 0.         0.31128783 0.23949174]
 [0.67539626 0.77616148 0.57640776 0.81648815 0.57584422 0.38223988
  0.35938968 0.3112878

#### 23. Create a custom dtype that describes a color as four unsigned bytes (RGBA) (★☆☆)

In [4]:
answer(23)

color = np.dtype([("r", np.ubyte),
                  ("g", np.ubyte),
                  ("b", np.ubyte),
                  ("a", np.ubyte)])


In [5]:
color = np.dtype([("r", np.ubyte),
                  ("g", np.ubyte),
                  ("b", np.ubyte),
                  ("a", np.ubyte)])

In [13]:
a = np.array([(1, 1, 0, 1), 2], dtype= color)
# Auto broadcasting! if np.array([1]) => (1, 1, 1, 1)!

array([(1, 1, 0, 1), (2, 2, 2, 2)],
      dtype=[('r', 'u1'), ('g', 'u1'), ('b', 'u1'), ('a', 'u1')])

#### 65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)

In [57]:
# 가장 쉬운 방법 => 노가다 하는것
def my_answer_65(index_list, x) :
    ans = [0 for i in range(np.amax(index_list)+1)]
    for (i, val) in zip(index_list, x) :
        ans[i]+=val
    return ans

##### but.. above is too slow..

In [36]:
answer(65)

# Author: Alan G Isaac

X = [1,2,3,4,5,6]
I = [1,3,9,3,4,1]
F = np.bincount(I,X)
print(F)


In [56]:
index_list = np.random.randint(30, size = 30)
x = np.random.randint(100, size = index_list.size)
# I[0]=1 => 빈도수(F) 배열의 첫번째 index 값이 올라가는데, 이때 weight가 1이므로 1만큼 올라감
# 동일하게 I[5]=1이면 weight가 6이므로 6만큼 올라감 => 7

##### Which code is faster?

In [59]:
%timeit -n 10000 my_answer_65(index_list, x)
%timeit -n 10000 np.bincount(index_list, x)
# 역시 내장함수 행님이십니다

7.99 µs ± 263 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
1.29 µs ± 206 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


#### 11. Create a 3x3 identity matrix (★☆☆)

In [68]:
mat = np.identity(3, dtype = int)
mat
# 역시 내장함수 행님이십니다...
# eye function is more useful.. but speed?

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [74]:
%timeit -n 100000 np.identity(3, dtype = int)
%timeit -n 100000 np.eye(3, dtype = int)
# eye가 더 빠르노;; 버그임 ?

1.5 µs ± 45.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
1.1 µs ± 67.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


#### 8. Reverse a vector (first element becomes last) (★☆☆)

In [90]:
v = np.random.random(size = 10)
print(v)
v = v[::-1]
print(v)

[0.02798373 0.28788049 0.94514938 0.97362092 0.13439314 0.82789724
 0.86297401 0.59976331 0.03461704 0.75502482]
[0.75502482 0.03461704 0.59976331 0.86297401 0.82789724 0.13439314
 0.97362092 0.94514938 0.28788049 0.02798373]


In [86]:
answer(8)

Z = np.arange(50)
Z = Z[::-1]
print(Z)


#### 30. How to find common values between two arrays? (★☆☆)

In [93]:
a = np.random.randint(10, size = 8)
b = np.random.randint(10, size = 8)
ans = np.intersect1d(a, b)
print(a, b, ans, sep='\n')

[2 3 1 5 2 9 8 4]
[6 6 9 7 1 2 8 7]
[1 2 8 9]


In [94]:
answer(30)

Z1 = np.random.randint(0,10,10)
Z2 = np.random.randint(0,10,10)
print(np.intersect1d(Z1,Z2))


#### 16. How to add a border (filled with 0's) around an existing array? (★☆☆)

In [107]:
x = np.ones((5, 5))
x = np.pad(x, pad_width = 1, mode = 'constant', constant_values=0) # CNN Zero pading
x

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 0.],
       [0., 1., 1., 1., 1., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

#### 3. Create a null vector of size 10 (★☆☆)

In [115]:
np.zeros(shape = (10))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

#### 92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★)

In [118]:
np.einsum()

# Author: Ryan G.

x = np.random.rand(int(5e7))

%timeit np.power(x,3)
%timeit x*x*x
%timeit np.einsum('i,i,i->i',x,x,x)
