In [2]:
SEED = 100

# Numpy

Take a look at the exercises on the site  [101 NumPy Exercises for Data Analysis](https://www.machinelearningplus.com/python/101-numpy-exercises-python/) and resolve some of them. We recommend these 3
- 45. How to find the most frequent value in a numpy array?
- 47. How to replace all values greater than a given value to a given cutoff?
- 50. How to convert an array of arrays into a flat 1d array?

In [3]:
import numpy as np
from scipy.stats import mode

np.random.seed(seed=SEED)

In [4]:
# 45 How to find the most frequent value in a numpy array?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float32')

print(mode(iris[:, 2]))

ModeResult(mode=np.float32(1.5), count=np.int64(14))


In [5]:
# 47 How to replace all values greater than a given value to a given cutoff?
# Q. From the array a, replace all values greater than 30 to 30 and less than 10 to 10.
a = np.random.uniform(1, 50, 20)

a[a > 30] = 30
a[a < 10] = 10
print(a)

# Solution 1: Using np.clip
np.clip(a, a_min=10, a_max=30)
print(a)

[27.62684215 14.64009987 21.80136195 30.         10.         10.
 30.         30.         10.         29.17957314 30.         11.25090398
 10.08108276 10.         11.76517714 30.         30.         10.
 30.         14.42961361]
[27.62684215 14.64009987 21.80136195 30.         10.         10.
 30.         30.         10.         29.17957314 30.         11.25090398
 10.08108276 10.         11.76517714 30.         30.         10.
 30.         14.42961361]


In [6]:
# 50 How to convert an array of arrays into a flat 1d array?
arr1 = np.arange(3)
arr2 = np.arange(3,6)
arr3 = np.arange(6,9)

array_of_arrays = np.array([arr1, arr2, arr3])
print(array_of_arrays)

flattened_array = array_of_arrays.flatten()
print(flattened_array)

# Desired output: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[0 1 2 3 4 5 6 7 8]


In [21]:
# 58. How to find the duplicate records in a numpy array?

# Input
np.random.seed(100)
a = np.random.randint(0, 5, 10)
print('Array: ', a)
#> Array: [0 0 3 0 2 4 2 2 2 2]

unique, unique_index = np.unique(a, return_index=True)
print(unique)
print(index)
dup = unique[index > 1]
print(dup)

out = np.full(a.shape[0], True)

out[index] = False

print(out)


# Desired
#> [False  True False  True False False  True  True  True  True]


Array:  [0 0 3 0 2 4 2 2 2 2]
[0 2 3 4]
[0 4 2 5]
[2 3 4]
[False  True False  True False False  True  True  True  True]


In [25]:
# 61. How to drop all missing values from a numpy array?

a = np.array([1,2,3,np.nan,5,6,7,np.nan])

result = a[~np.isnan(a)]
print(result)

# Desired array([ 1.,  2.,  3.,  5.,  6.,  7.])


[1. 2. 3. 5. 6. 7.]


In [27]:
# 62. How to compute the euclidean distance between two arrays?

a = np.array([1,2,3,4,5])
b = np.array([4,5,6,7,8])

distance = np.linalg.norm(a - b)

print(distance)

6.708203932499369


In [33]:
# 64. How to subtract a 1d array from a 2d array, where each item of 1d array subtracts from respective row?

a_2d = np.array([[3,3,3],[4,4,4],[5,5,5]])
b_1d = np.array([1,2,3])

print(b_1d[:, None])

result = a_2d - b_1d[:, None]
print(result)
#> [[2 2 2]
#>  [2 2 2]
#>  [2 2 2]]

[[1]
 [2]
 [3]]
[[2 2 2]
 [2 2 2]
 [2 2 2]]
