In [1]:
import numpy as np

In [2]:
# Q. Import numpy as `np` and print the version number.

In [3]:
import numpy as np
print(np.__version__)

1.17.2


In [4]:
# Q. How to create a 1D array?

In [5]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
# Q. How to create a boolean array?

In [7]:
np.full((3, 3), True, dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [8]:
# Alternate method:
np.ones((3,3), dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [9]:
# Q. How to extract items that satisfy a given condition from 1D array?
#    Extract all odd numbers from arr

In [10]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
arr[arr % 2 == 1]

array([1, 3, 5, 7, 9])

In [12]:
# Q. Replace all odd numbers in arr with -1

In [13]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
arr[arr % 2 == 1] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [15]:
# Q. Replace all odd numbers in arr with -1 without changing arr

In [16]:
arr = np.arange(10)
out = np.where(arr % 2 == 1, -1, arr)

print(arr)
out

[0 1 2 3 4 5 6 7 8 9]


array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [17]:
# Q. Convert a 1D array to a 2D array with 2 rows

In [18]:
arr = np.arange(10)
arr.reshape(2, -1)  # Setting to -1 automatically decides the number of cols

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [19]:
# How to stack two arrays vertically?

In [20]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

In [21]:
np.arange(10).reshape(2, 5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [22]:
np.arange(10).reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [23]:
# Method 1:
np.concatenate([a, b], axis=0)

# Method 2:
np.vstack([a, b])

# Method 3:
np.r_[a, b]

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [24]:
# Q. Get the common items between a and b

In [25]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [26]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.intersect1d(a,b)

array([2, 4])

In [27]:
# Q. From array a remove all items present in array b

In [28]:
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

In [29]:
# From 'a' remove all of 'b'
np.setdiff1d(a,b)

array([1, 2, 3, 4])

In [30]:
# Q. Get the positions where elements of a and b match

In [31]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.where(a == b)

(array([1, 3, 5, 7], dtype=int64),)

In [32]:
# Q. Get all items between 5 and 10 from a

In [33]:
a = np.arange(15)

# Method 1
index = np.where((a >= 5) & (a <= 10))
a[index]


array([ 5,  6,  7,  8,  9, 10])

In [34]:
# Q. Limit the number of items printed in python numpy array a to a maximum of 6 elements.
a = np.arange(15)

In [35]:
np.set_printoptions(threshold=6)
a = np.arange(15)
a

array([ 0,  1,  2, ..., 12, 13, 14])

In [36]:
# Q. Print or show only 3 decimal places of the numpy array rand_arr.


In [37]:
# Input
rand_arr = np.random.random((5,3))

# Create the random array
rand_arr = np.random.random([5,3])

# Limit to 3 decimal places
np.set_printoptions(precision=2)
rand_arr[:4]

array([[0.27, 0.39, 0.57],
       [0.45, 0.64, 0.78],
       [0.12, 0.87, 0.19],
       [0.3 , 0.26, 0.94]])

In [38]:
# Q. Extract the text column species from the 1D iris imported

In [39]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None,encoding=None)

In [40]:
print(iris_1d.shape)

(150,)


In [41]:
iris_1d[:5]

array([(5.1, 3.5, 1.4, 0.2, 'Iris-setosa'),
       (4.9, 3. , 1.4, 0.2, 'Iris-setosa'),
       (4.7, 3.2, 1.3, 0.2, 'Iris-setosa'),
       (4.6, 3.1, 1.5, 0.2, 'Iris-setosa'),
       (5. , 3.6, 1.4, 0.2, 'Iris-setosa')],
      dtype=[('f0', '<f8'), ('f1', '<f8'), ('f2', '<f8'), ('f3', '<f8'), ('f4', '<U15')])

In [42]:
species = np.array([row[4] for row in iris_1d])

In [43]:
species

array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', ..., 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica'], dtype='<U15')

In [44]:
# Q. Find the mean, median, standard deviation of iris's sepallength (1st column

In [45]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object',encoding=None)

In [46]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object',encoding=None)
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

In [47]:
sepallength

array([5.1, 4.9, 4.7, ..., 6.5, 6.2, 5.9])

In [48]:
mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mu, med, sd)

5.843333333333334 5.8 0.8253012917851409


In [49]:
# Q. Create a normalized form of iris's sepallength whose values 
# range exactly between 0 and 1 so that the minimum has value 0 and 
# maximum has value 1.

In [50]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

In [51]:
Smax, Smin = sepallength.max(), sepallength.min()

In [52]:
S = (sepallength - Smin)/(Smax - Smin)

In [53]:
S

array([0.22, 0.17, 0.11, ..., 0.61, 0.53, 0.44])

In [54]:
# Q. Find the 5th and 95th percentile of iris's sepallength

In [55]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

In [56]:
np.percentile(sepallength, q=[5, 95])

array([4.6 , 7.25])

In [57]:
# Q. Find the number and position of missing values in iris_2d's sepallength (1st column)

In [58]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])


In [59]:
iris_2d

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       ...,
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])

In [60]:
row = np.random.randint(150, size=20)

In [61]:
col = np.random.randint(4, size=20)

In [62]:
iris_2d[row, col] = np.nan

In [63]:
print("Number of missing values: \n", np.isnan(iris_2d[:, 0]).sum())
print("Position of missing values: \n", np.where(np.isnan(iris_2d[:, 0])))

Number of missing values: 
 5
Position of missing values: 
 (array([ 47,  48,  56,  90, 118], dtype=int64),)


In [64]:
# Q. Filter the rows of iris_2d that has petallength (3rd column) > 1.5 
# and sepallength (1st column) < 5.0

In [65]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [66]:
condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
condition

array([False, False, False, ..., False, False, False])

In [67]:
iris_2d[condition]

array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

In [68]:
# Q. Find the correlation between SepalLength(1st column) and 
# PetalLength(3rd column) in iris_2d

In [69]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [70]:
# Solution 1
np.corrcoef(iris[:, 0], iris[:, 2])


array([[1.  , 0.87],
       [0.87, 1.  ]])

In [71]:
# Q. Find out if iris_2d has any missing values.


In [72]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [73]:
np.isnan(iris_2d).any()

False

In [74]:
# Q. Find the unique values and the count of unique values in iris's species

In [75]:
# Import iris keeping the text column intact
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [76]:
# Extract the species column as an array
species = np.array([row.tolist()[4] for row in iris])

In [77]:
# Get the unique values and the counts
np.unique(species, return_counts=True)

(array([b'Iris-setosa', b'Iris-versicolor', b'Iris-virginica'],
       dtype='|S15'), array([50, 50, 50], dtype=int64))

In [78]:
# Q. Bin the petal length (3rd) column of iris_2d to form a text array, 
# such that if petal length is:
# Less than 3 --> 'small'
# 3-5 --> 'medium'
# '>=5 --> 'large'

In [79]:
# Input
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [80]:
# Bin petallength 
petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])
petal_length_bin

array([1, 1, 1, ..., 3, 3, 3], dtype=int64)

In [81]:
# Map it to respective category
label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
petal_length_cat = [label_map[x] for x in petal_length_bin]

In [82]:
# View
petal_length_cat[:4]

['small', 'small', 'small', 'small']

In [83]:
# Q. Sort the iris dataset based on sepallength column.


In [84]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [85]:
# Sort by column position 0: SepalLength
iris[iris[:,0].argsort()][:20]

array([[b'4.3', b'3.0', b'1.1', b'0.1', b'Iris-setosa'],
       [b'4.4', b'3.2', b'1.3', b'0.2', b'Iris-setosa'],
       [b'4.4', b'3.0', b'1.3', b'0.2', b'Iris-setosa'],
       ...,
       [b'4.9', b'2.5', b'4.5', b'1.7', b'Iris-virginica'],
       [b'4.9', b'3.1', b'1.5', b'0.1', b'Iris-setosa'],
       [b'4.9', b'3.1', b'1.5', b'0.1', b'Iris-setosa']], dtype=object)

In [86]:
# Q. Find the most frequent value of petal length (3rd column) in iris dataset.

In [87]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [88]:
vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])

b'1.5'
