### Reference:
* machinelearningplus.com/python/101-numpy-exercises-python/
* https://github.com/rougier/numpy-100

In [1]:
import numpy as np
print(np.__version__)

1.19.5


In [2]:
arr = np.arange(1, 10)
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
# Extract all odd numbers from arr
arr[arr % 2 == 1]

array([1, 3, 5, 7, 9])

In [5]:
# Replace all odd numbers in arr with -1
arr[arr % 2 == 1] = -1
arr

array([-1,  2, -1,  4, -1,  6, -1,  8, -1])

In [6]:
# Replace all odd numbers in arr with -1 without changing arr
arr = np.arange(1, 10)
out = np.where(arr % 2 == 1, -1, arr)
print(arr)
out

[1 2 3 4 5 6 7 8 9]


array([-1,  2, -1,  4, -1,  6, -1,  8, -1])

In [9]:
# Q. Convert a 1D array to a 2D array with 2 rows
arr = np.arange(10)
arr.reshape(2, -1)  # Setting to -1 automatically decides the number of cols

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

#### How to stack two arrays vertically?

In [10]:
# Stack arrays a and b vertically
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
print(a)
print(b)

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[1 1 1 1 1]
 [1 1 1 1 1]]


In [11]:
# Method 1:
np.concatenate([a, b], axis=0)

# Method 2:
# np.vstack([a, b])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### How to convert an array of arrays into a flat 1d array?

In [44]:
 # Input:
arr1 = np.arange(3)
arr2 = np.arange(3,7)
arr3 = np.arange(7,10)

array_of_arrays = np.array([arr1, arr2, arr3])
print('array_of_arrays: ', array_of_arrays)

# Solution 1
# arr_2d = np.array([a for arr in array_of_arrays for a in arr])

# Solution 2:
arr_2d = np.concatenate(array_of_arrays)
print(arr_2d)

array_of_arrays:  [array([0, 1, 2]) array([3, 4, 5, 6]) array([7, 8, 9])]
[0 1 2 3 4 5 6 7 8 9]


  array_of_arrays = np.array([arr1, arr2, arr3])


#### How to stack two arrays horizontally?

In [12]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

In [13]:
# Method 1:
np.concatenate([a, b], axis=1)

# Method 2:
# np.hstack([a, b])

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

#### How to get the common items between two python numpy arrays?

In [14]:
# Get the common items between a and b
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [15]:
np.intersect1d(a,b)

array([2, 4])

In [16]:
# Get the positions where elements of a and b match
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a == b)

(array([1, 3, 5, 7], dtype=int64),)

In [17]:
# Get all items between 5 and 10 from a
a = np.arange(15)
# Method 1
index = np.where((a >= 5) & (a <= 10))
a[index]

# Method 3: (thanks loganzk!)
a[(a >= 5) & (a <= 10)]

array([ 5,  6,  7,  8,  9, 10])

#### How to swap two columns in a 2d numpy array?

In [18]:
arr = np.arange(9).reshape(3,3)
# Solution
arr[:, [1,0,2]]

array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

In [19]:
# How to swap two rows in a 2d numpy array?
arr = np.arange(9).reshape(3,3)

# Solution
arr[[1,0,2], :]

array([[3, 4, 5],
       [0, 1, 2],
       [6, 7, 8]])

#### How to reverse the rows of a 2D array?

In [23]:
arr = np.arange(9).reshape(3,3)
print(arr)

# Solution
# arr[::-1]
arr[::-1, :]

[[0 1 2]
 [3 4 5]
 [6 7 8]]


array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [21]:
# How to reverse the columns of a 2D array?
arr = np.arange(9).reshape(3,3)

# Solution
arr[:, ::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

#### How to create a 2D array containing random floats between 5 and 10?

In [24]:
# Solution Method 1:
rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
# print(rand_arr)

# Solution Method 2:
rand_arr = np.random.uniform(5,10, size=(5,3))
print(rand_arr)

[[8.01908791 6.33939977 8.48666331]
 [6.12229299 6.01458654 5.25542653]
 [5.3894374  9.84285531 8.91574652]
 [6.1585314  8.80836871 8.10900442]
 [7.14288634 7.36531358 8.31272115]]


#### How to pretty print a numpy array by suppressing the scientific notation (like 1e10)?

In [25]:
np.set_printoptions(suppress=False)

np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[5.43404942e-04, 2.78369385e-04, 4.24517591e-04],
       [8.44776132e-04, 4.71885619e-06, 1.21569121e-04],
       [6.70749085e-04, 8.25852755e-04, 1.36706590e-04]])

In [26]:
np.set_printoptions(suppress=True, precision=6)  # precision is optional
rand_arr

array([[0.000543, 0.000278, 0.000425],
       [0.000845, 0.000005, 0.000122],
       [0.000671, 0.000826, 0.000137]])

#### How to find the percentile scores of a numpy array?

In [27]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

# Solution
np.percentile(sepallength, q=[5, 95])

array([4.6  , 7.255])

#### How to filter a numpy array based on two or more conditions?

In [28]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

# Solution
condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
iris_2d[condition]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

#### How to find if a given array has any null values?

In [31]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

np.isnan(iris_2d).any()

False

#### How to replace all missing values with 0 in a numpy array?

In [32]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

# Solution
iris_2d[np.isnan(iris_2d)] = 0
iris_2d[:4]

array([[5.1, 3.5, 1.4, 0. ],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2]])

#### How to drop rows that contain a missing value from a numpy array?

In [29]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

# Solution
# No direct numpy function for this.
# Method 1:
# any_nan_in_row = np.array([~np.any(np.isnan(row)) for row in iris_2d])
# iris_2d[any_nan_in_row][:5]

# Method 2: (By Rong)
iris_2d[np.sum(np.isnan(iris_2d), axis = 1) == 0][:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5.4, 3.9, 1.7, 0.4]])

#### How to find the correlation between two columns of a numpy array?

In [30]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

# Solution 1
np.corrcoef(iris[:, 0], iris[:, 2])[0, 1]

# Solution 2
from scipy.stats.stats import pearsonr  
corr, p_value = pearsonr(iris[:, 0], iris[:, 2])
print(corr)

0.8717541573048713


#### How to find the count of unique values in a numpy array?

In [33]:
# Import iris keeping the text column intact
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Solution
# Extract the species column as an array
species = np.array([row.tolist()[4] for row in iris])

# Get the unique values and the counts
np.unique(species, return_counts=True)

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
       dtype='|S15'),
 array([50, 50, 50], dtype=int64))

#### How to find the most frequent value in a numpy array?

In [34]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

# Solution:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


#### How to get the positions of top n values from a numpy array?

In [38]:
# Input
np.random.seed(100)
a = np.random.uniform(1,50, 20)

# Solution:
print(a.argsort()[:5])

# Solution 2:
# np.argpartition(-a, 5)[:5]

[ 4 13  5  8 17]


In [39]:
# Below methods will get you the values.
# Method 1:
a[a.argsort()][-5:]

# Method 2:
np.sort(a)[-5:]

array([40.995013, 41.466785, 42.39403 , 44.674776, 48.952565])