<a href="https://colab.research.google.com/github/toz015/USTSV/blob/master/101_Numpy_For_DA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###101 NumPy Exercises for Data Analysis
The goal of the numpy exercises is to serve as a reference as well as to get you to apply numpy beyond the basics. The questions are of 4 levels of difficulties with L1 being the easiest to L4 being the hardest.

In [None]:
#@title 1. Import numpy as np and see the version
import numpy as np 
print(np.__version__)


1.18.5


In [None]:
#@title 2. Create a 1D array of numbers from 0 to 9
print("Sol1:")
print(np.array([i for i in range(10)]))
print("Sol2:")
print(np.arange(10))

Sol1:
[0 1 2 3 4 5 6 7 8 9]
Sol2:
[0 1 2 3 4 5 6 7 8 9]


In [None]:
#@title 3. Create a 3×3 numpy array of all True’s
print("Sol1:")
print(np.array([True] * 9).reshape(3, 3))
print("Sol2:")
print(np.full((3, 3), True, dtype=bool))
print("Sol3:")
print(np.ones((3,3), dtype=bool))

Sol1:
[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]
Sol2:
[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]
Sol3:
[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]


In [None]:
#@title 4. Extract all odd numbers from arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
print(arr[arr % 2 == 1])

[1 3 5 7 9]


In [None]:
#@title 5. Replace all odd numbers in arr with -1
arr[arr % 2 == 1] = -1
print(arr)

[ 0 -1  2 -1  4 -1  6 -1  8 -1]


In [None]:
#@title 6. How to replace items that satisfy a condition without affecting the original array?
#Replace all odd numbers in arr with -1 without changing arr
arr = np.arange(10)
out = np.where(arr % 2 == 1, -1, arr)
print(arr)
print(out)

[0 1 2 3 4 5 6 7 8 9]
[ 0 -1  2 -1  4 -1  6 -1  8 -1]


In [None]:
#@title 7. How to reshape an array?
#Convert a 1D array to a 2D array with 2 rows
print("sol1:")
print(np.arange(10).reshape(2, 5))
print("sol2:")
print(np.arange(10).reshape(2, -1))

sol1:
[[0 1 2 3 4]
 [5 6 7 8 9]]
sol2:
[[0 1 2 3 4]
 [5 6 7 8 9]]


In [None]:
#@title 8. How to stack two arrays vertically?
#Stack arrays a and b vertically
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
print("sol1:")
print(np.concatenate([a, b], axis=0))
print("sol2:")
print(np.vstack([a, b]))
print("sol3:")
print(np.r_[a, b])

sol1:
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]
sol2:
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]
sol3:
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]


In [None]:
#@title 9. How to stack two arrays horizontally?
print("sol1:")
print(np.concatenate([a, b], axis = 1))
print("sol2:")
print(np.hstack([a, b]))
print("sol3:")
print(np.c_[a, b])

sol1:
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]
sol2:
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]
sol3:
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]


In [None]:
#@title 10. How to generate custom sequences in numpy without hardcoding?
a = np.array([1,2,3])
print(np.r_[np.repeat(a, 3), np.tile(a, 3)])

[1 1 1 2 2 2 3 3 3 1 2 3 1 2 3 1 2 3]


In [None]:
#@title 11. How to get the common items between two python numpy arrays?
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
print(np.intersect1d(a, b))

[2 4]


In [None]:
#@title 12. How to remove from one array those items that exist in another?
print(np.setdiff1d(a, b))



[1 3 5 6]


In [None]:
#@title 13. How to get the positions where elements of two arrays match?
np.where(a == b)

(array([1, 3, 5, 7]),)

In [None]:
#@title 14. How to extract all numbers between a given range from a numpy array?
#Get all items between 5 and 10 from a.
a = np.array([2, 6, 1, 9, 10, 3, 27])
print("sol1:")
index = np.where((5 <= a) & (a <= 10))
print(a[index])
print("sol2:")
index = np.where(np.logical_and(a>=5, a<=10))
print(a[index])
print("sol3:")
print(a[(a >= 5) & (a <= 10)])

sol1:
[ 6  9 10]
sol2:
[ 6  9 10]
sol3:
[ 6  9 10]


In [None]:
#@title 15. How to make a python function that handles scalars to work on numpy arrays?
# Convert the function maxx that works on two scalars, to work on two arrays.
def maxx(x, y):
  """Get the maximum of two items"""
  if x >= y:
      return x
  else:
      return y
pair_max = np.vectorize(maxx, otypes=[float])

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

print(pair_max(a, b))

[6. 7. 9. 8. 9. 7. 5.]


In [None]:
#@title 16. How to swap two columns in a 2d numpy array?
#Swap columns 1 and 2 in the array arr.
arr = np.arange(9).reshape(3,3)
print(arr)
print("sol:")
print(arr[:, [1, 0 , 2]])



[[0 1 2]
 [3 4 5]
 [6 7 8]]
sol:
[[1 0 2]
 [4 3 5]
 [7 6 8]]


In [None]:
#@title 17. How to swap two rows in a 2d numpy array?
#Swap row 1 and 2 in the array arr.
print(arr[[1, 0, 2], :])

[[3 4 5]
 [0 1 2]
 [6 7 8]]


In [None]:
#@title 18. How to reverse the rows of a 2D array?
print(arr[::-1, :])

[[6 7 8]
 [3 4 5]
 [0 1 2]]


In [None]:
#@title 19. How to reverse the columns of a 2D array?
print(arr[:, ::-1])

[[2 1 0]
 [5 4 3]
 [8 7 6]]


In [None]:
#@title 20. How to create a 2D array containing random floats between 5 and 10?

print("sol1:")
# random.random: Return the next random floating point number in the range [0.0, 1.0).
rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
print(rand_arr)
print("sol2:")
# Solution Method 2:
rand_arr = np.random.uniform(5,10, size=(5,3))
print(rand_arr)

sol1:
[[7.77925251 9.59306302 7.23478114]
 [6.20066608 7.53067246 8.80216551]
 [7.94863267 9.6924772  9.89838311]
 [6.97722573 6.07186376 8.29019093]
 [7.17863267 9.47490754 8.05532913]]
sol2:
[[8.22589768 7.96283549 7.983388  ]
 [8.05232986 6.35051133 7.20112047]
 [6.74855261 6.67432979 9.35045157]
 [9.44326386 8.59234105 8.26761077]
 [9.3239359  9.88150164 9.46519245]]


In [None]:
#@title 21. How to print only 3 decimal places in python numpy array?
rand_arr = np.random.random((5,3))

np.set_printoptions(precision=3)
rand_arr[:4]


array([[0.107, 0.486, 0.213],
       [0.659, 0.047, 0.903],
       [0.483, 0.889, 0.685],
       [0.11 , 0.179, 0.62 ]])

In [None]:
#@title 22. How to pretty print a numpy array by suppressing the scientific notation (like 1e10)?
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr


array([[5.434e-04, 2.784e-04, 4.245e-04],
       [8.448e-04, 4.719e-06, 1.216e-04],
       [6.707e-04, 8.259e-04, 1.367e-04]])

In [None]:
#@title 23. How to limit the number of items printed in output of numpy array?
np.set_printoptions(threshold = 6)
a = np.arange(15)
print(a)

[ 0  1  2 ... 12 13 14]


In [None]:
#@title 24. Print all output of numpy array
print("sol1:")
np.set_printoptions(threshold = a.size)
print(a)


sol1:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


In [None]:
#@title 25. How to import a dataset with numbers and texts keeping the text intact in python numpy?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
# Print the first 3 rows
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

In [None]:
#@title 26. How to extract a particular column from 1D array of tuples?
species = np.array([row[4] for row in iris])
species[:5]

array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa'], dtype='|S15')

In [None]:
#@title 27. How to convert a 1d array of tuples to a 2d numpy array?
print("sol1:")
# Method 1: Convert each row to a list and get the first 4 items
iris_2d = np.array([row.tolist()[:4] for row in iris])
print(iris_2d[:4])
print("sol2:")
# Alt Method 2: Import only the first 4 columns from source url
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
print(iris_2d[:4])

sol1:
[[b'5.1' b'3.5' b'1.4' b'0.2']
 [b'4.9' b'3.0' b'1.4' b'0.2']
 [b'4.7' b'3.2' b'1.3' b'0.2']
 [b'4.6' b'3.1' b'1.5' b'0.2']]
sol2:
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]]


In [None]:
#@title 28. How to compute the mean, median, standard deviation of a numpy array?

sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

# Solution
mu, med, sd = round(np.mean(sepallength), 3), round(np.median(sepallength)), round(np.std(sepallength), 3)
print(mu, med, sd)

5.843 6.0 0.825


In [None]:
#@title 29. How to normalize an array so the values range exactly between 0 and 1?
print("sol1:")
Smax, Smin = sepallength.max(), sepallength.min()
S = (sepallength - Smin)/(Smax - Smin)
print(S)
print("sol2:")
S = (sepallength - Smin)/sepallength.ptp()  # Thanks, David Ojeda!
print(S)

sol1:
[0.222 0.167 0.111 ... 0.611 0.528 0.444]
sol2:
[0.222 0.167 0.111 ... 0.611 0.528 0.444]


In [None]:
#@title 30. How to compute the softmax score?

def softmax(x):
    """Compute softmax values for each sets of scores in x.
    https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python"""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

print(softmax(sepallength))

[0.002 0.002 0.001 ... 0.009 0.007 0.005]


In [None]:
#@title 31. How to find the percentile scores of a numpy array?
np.percentile(sepallength, [5, 85])

array([4.6, 6.7])

In [None]:
#@title 32. How to insert values at random positions in an array?
# Method 1
i, j = np.where(iris_2d)

# i, j contain the row numbers and column numbers of 600 elements of iris_x
np.random.seed(100)
iris_2d[np.random.choice((i), 20), np.random.choice((j), 20)] = np.nan
print(iris_2d)


[[5.1 3.5 1.4 0.2]
 [nan 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 ...
 [6.5 3.  5.2 2. ]
 [6.2 3.4 5.4 2.3]
 [5.9 3.  5.1 1.8]]


In [None]:
#@title 33. How to find the position of missing values in numpy array?
print("Number of missing values: \n", np.isnan(iris_2d[:, 0]).sum())

print("Position of missing values: \n", np.where(np.isnan(iris_2d[:, 0])))

Number of missing values: 
 4
Position of missing values: 
 (array([ 1, 19, 23, 96]),)


In [None]:
#@title 34. How to filter a numpy array based on two or more conditions?
condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
iris_2d[condition]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, nan],
       [4.9, 2.5, 4.5, 1.7]])

In [None]:
#@title 35. How to drop rows that contain a missing value from a numpy array?
print("sol1:")
any_nan_in_row = np.array([~np.any(np.isnan(row)) for row in iris_2d])
print(iris_2d[any_nan_in_row][:5])
print("sol2:")
# Method 2: (By Rong)
print(iris_2d[np.sum(np.isnan(iris_2d), axis = 1) == 0][:5])

sol1:
[[5.1 3.5 1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]]
sol2:
[[5.1 3.5 1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]]


In [None]:
#@title 36. How to find the correlation between two columns of a numpy array?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

# Solution 1
print(np.corrcoef(iris[:, 0], iris[:, 2]))

# Solution 2
from scipy.stats.stats import pearsonr  
corr, p_value = pearsonr(iris[:, 0], iris[:, 2])
print(corr)


[[1.    0.872]
 [0.872 1.   ]]
0.8717541573048713


In [None]:
#@title 37. How to find if a given array has any null values?
np.isnan(iris_2d).sum()

20

In [None]:
#@title 38. How to replace all missing values with 0 in a numpy array?
iris_2d[np.isnan(iris_2d)] = 0
print(iris_2d)

[[5.1 3.5 1.4 0.2]
 [0.  3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 ...
 [6.5 3.  5.2 2. ]
 [6.2 3.4 5.4 2.3]
 [5.9 3.  5.1 1.8]]


In [None]:
#@title 39. How to find the count of unique values in a numpy array?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Solution
# Extract the species column as an array
species = np.array([row.tolist()[4] for row in iris])

# Get the unique values and the counts
np.unique(species, return_counts=True)

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
       dtype='|S15'), array([50, 50, 50]))

In [None]:
#@title 40. How to convert a numeric to a categorical (text) array?
# Bin petallength 
petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])

# Map it to respective category
label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
petal_length_cat = [label_map[x] for x in petal_length_bin]

# View
petal_length_cat[:4]


['small', 'small', 'small', 'small']

In [None]:
#@title 41. How to create a new column from existing columns of a numpy array?
sepallength = iris_2d[:, 0].astype('float')
petallength = iris_2d[:, 2].astype('float')
volume = (np.pi * petallength * (sepallength**2))/3


# Introduce new dimension to match iris_2d's
volume = volume[:, np.newaxis]
out = np.hstack([iris_2d, volume])
print(out)

[[5.100e+00 3.500e+00 1.400e+00 2.000e-01 3.813e+01]
 [0.000e+00 3.000e+00 1.400e+00 2.000e-01 0.000e+00]
 [4.700e+00 3.200e+00 1.300e+00 2.000e-01 3.007e+01]
 ...
 [6.500e+00 3.000e+00 5.200e+00 2.000e+00 2.301e+02]
 [6.200e+00 3.400e+00 5.400e+00 2.300e+00 2.174e+02]
 [5.900e+00 3.000e+00 5.100e+00 1.800e+00 1.859e+02]]


In [None]:
#@title 42. How to do probabilistic sampling in numpy?

# Get the species column
species = iris[:, 4]

# Approach 1: Generate Probablistically
np.random.seed(100)
a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])

# Approach 2: Probablistic Sampling (preferred)
np.random.seed(100)
probs = np.r_[np.linspace(0, 0.500, num=50), np.linspace(0.501, .750, num=50), np.linspace(.751, 1.0, num=50)]
index = np.searchsorted(probs, np.random.random(150))
species_out = species[index]
print(np.unique(species_out, return_counts=True))

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
      dtype=object), array([77, 37, 36]))


In [None]:
#@title 43. How to get the second largest value of an array when grouped by another array?
#What is the value of second longest petallength of species setosa
# Get the species and petal length columns
petal_len_setosa = iris[iris[:, 4] == b'Iris-setosa', [2]].astype('float')

# Get the second last value
np.unique(np.sort(petal_len_setosa))[-2]

1.7

In [None]:
#@title 44. How to sort a 2D array by a column
#Sort the iris dataset based on sepallength column.
print(iris[iris[:,0].argsort()])

[[b'4.3' b'3.0' b'1.1' b'0.1' b'Iris-setosa']
 [b'4.4' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
 [b'4.4' b'3.0' b'1.3' b'0.2' b'Iris-setosa']
 ...
 [b'7.7' b'3.8' b'6.7' b'2.2' b'Iris-virginica']
 [b'7.7' b'3.0' b'6.1' b'2.3' b'Iris-virginica']
 [b'7.9' b'3.8' b'6.4' b'2.0' b'Iris-virginica']]


In [None]:
#@title 45. How to find the most frequent value in a numpy array?
#Find the most frequent value of petal length (3rd column) in iris dataset.
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'


In [None]:
#@title 46. How to find the position of the first occurrence of a value greater than a given value?
#Find the position of the first occurrence of a value greater than 1.0 in petalwidth 4th column of iris dataset.
np.argwhere(iris[:, 3].astype(float) > 1.0)[0]

array([50])

In [None]:
#@title 47. How to replace all values greater than a given value to a given cutoff?
#From the array a, replace all values greater than 30 to 30 and less than 10 to 10.
print("sol1:")
np.random.seed(100)
a = np.random.uniform(1,50, 20)
print(np.clip(a, a_min=10, a_max=30))
print("sol2:")
print(np.where(a < 10, 10, np.where(a > 30, 30, a)))

sol1:
[27.627 14.64  21.801 ... 10.    30.    14.43 ]
sol2:
[27.627 14.64  21.801 ... 10.    30.    14.43 ]


In [None]:
#@title 48. How to get the positions of top n values from a numpy array?
#Get the positions of top 5 maximum values in a given array a.
print("sol1:")
print(a.argsort()[:5])

print("sol2:")
print(np.argpartition(-a, 5)[:5])


sol1:
[ 4 13  5  8 17]
sol2:
[15 10  3  7 18]


In [None]:
#@title 49. How to compute the row wise counts of all possible values in an array?
#?????

In [None]:
#@title 50. How to convert an array of arrays into a flat 1d array?
arr1 = np.arange(3)
arr2 = np.arange(3,7)
arr3 = np.arange(7,10)

array_of_arrays = np.array([arr1, arr2, arr3])
print("sol1:")
arr_2d = np.array([a for arr in array_of_arrays for a in arr])
print(arr_2d)
print("sol2:")
arr_2d = np.concatenate(array_of_arrays)
print(arr_2d)

sol1:
[0 1 2 3 4 5 6 7 8 9]
sol2:
[0 1 2 3 4 5 6 7 8 9]


In [None]:
#@title 51. How to generate one-hot encodings for an array in numpy?
np.random.seed(101) 
arr = np.random.randint(1,4, size=6)
arr
# Solution:
def one_hot_encodings(arr):
    uniqs = np.unique(arr)
    out = np.zeros((arr.shape[0], uniqs.shape[0]))
    for i, k in enumerate(arr):
        out[i, k-1] = 1
    return out

one_hot_encodings(arr)

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [None]:
#@title 52. How to create row numbers grouped by a categorical variable?
# Create row numbers grouped by a categorical variable.
species = np.genfromtxt(url, delimiter=',', dtype='str', usecols=4)
species_small = np.sort(np.random.choice(species, size=20))
print([i for val in np.unique(species_small) for i, grp in enumerate(species_small[species_small==val])])

[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 6, 7, 8]


In [None]:
#@title 53. How to create groud ids based on a given categorical variable?
# Solution:
output = [np.argwhere(np.unique(species_small) == s).tolist()[0][0] for val in np.unique(species_small) for s in species_small[species_small==val]]

# Solution: For Loop version
output = []
uniqs = np.unique(species_small)

for val in uniqs:  # uniq values in group
    for s in species_small[species_small==val]:  # each element in group
        groupid = np.argwhere(uniqs == s).tolist()[0][0]  # groupid
        output.append(groupid)

print(output)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2]


In [None]:
#@title 54. How to rank items in an array using numpy?
np.random.seed(10)
a = np.random.randint(20, size=10)
print('Array: ', a)

print(a.argsort().argsort())
print('Array: ', a)

Array:  [ 9  4 15  0 17 16 17  8  9  0]
[4 2 6 0 8 7 9 3 5 1]
Array:  [ 9  4 15  0 17 16 17  8  9  0]


In [None]:
#@title 55. How to rank items in a multidimensional array using numpy?
print(a.ravel().argsort().argsort())

[4 2 6 0 8 7 9 3 5 1]


In [None]:
#@title 56. How to find the maximum value in each row of a numpy array 2d?
np.random.seed(100)
a = np.random.randint(1,10, [5,3])
print(np.amax(a, axis=1))

[9 8 6 3 9]


In [None]:
#@title 57. How to compute the min-by-max for each row for a numpy array 2d?
np.apply_along_axis(lambda x: np.min(x)/np.max(x), arr=a, axis=1)

array([0.444, 0.125, 0.5  , 1.   , 0.111])

In [None]:
#title 58. How to find the duplicate records in a numpy array?
#Find the duplicate entries (2nd occurrence onwards) in the given numpy array and mark them as True. First time occurrences should be False.
np.random.seed(100)
a = np.random.randint(0, 5, 10)

## Solution
# There is no direct function to do this as of 1.13.3

# Create an all True array
out = np.full(a.shape[0], True)

# Find the index positions of unique elements
unique_positions = np.unique(a, return_index=True)[1]

# Mark those positions as False
out[unique_positions] = False

print(out)

[False  True False  True False False  True  True  True  True]


In [None]:
#@title 60. How to convert a PIL image to numpy array?
from io import BytesIO
from PIL import Image
import PIL, requests

# Import image from URL
URL = 'https://upload.wikimedia.org/wikipedia/commons/8/8b/Denali_Mt_McKinley.jpg'
response = requests.get(URL)

# Read it as Image
I = Image.open(BytesIO(response.content))

# Optionally resize
I = I.resize([150,150])

# Convert to numpy array
arr = np.asarray(I)

# Optionaly Convert it back to an image and show
im = PIL.Image.fromarray(np.uint8(arr))
Image.Image.show(im)

None


In [None]:
#@title 61. Drop all nan
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
a[~np.isnan(a)]

array([1., 2., 3., 5., 6., 7.])

In [None]:
#@title 62. How to compute the euclidean distance between two arrays?
a = np.array([1,2,3,4,5])
b = np.array([4,5,6,7,8])

# Solution
dist = np.linalg.norm(a-b)
dist

6.708203932499369

In [None]:
#@title 66. How to convert numpy's datetime64 object to datetime's datetime object?
# Input: a numpy datetime64 object
dt64 = np.datetime64('2018-02-25 22:10:10')
# Input: a numpy datetime64 object
dt64 = np.datetime64('2018-02-25 22:10:10')

# Solution
from datetime import datetime
dt64.tolist()

# or

dt64.astype(datetime)
#> datetime.datetime(2018, 2, 25, 22, 10, 10)

datetime.datetime(2018, 2, 25, 22, 10, 10)

In [None]:
#@title 67. Compute moving average 
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

np.random.seed(100)
Z = np.random.randint(10, size=10)
print('array: ', Z)
# Method 1
moving_average(Z, n=3).round(2)

array:  [8 8 3 7 7 0 4 2 5 2]


array([6.33, 6.  , 5.67, 4.67, 3.67, 2.  , 3.67, 3.  ])