# NumPy <sup><sub>*(Continued)*</sub></sup>

We will continue with more of NumPy. Let's get going!

In [1]:
# First things first
import numpy as np

## Fancy Indexing

In [2]:
x = np.arange(0, 20, 2)
x

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [3]:
x[[0, 3, 5]]

array([ 0,  6, 10])

In [4]:
indx = [5, 7, 1]
x[indx]

array([10, 14,  2])

In [5]:
np.random.seed(0)
x = np.random.randint(0, 20, size=(5, 4))
x

array([[12, 15,  0,  3],
       [ 3,  7,  9, 19],
       [18,  4,  6, 12],
       [ 1,  6,  7, 14],
       [17,  5, 13,  8]])

In [6]:
row = [1, 3, 4]
col = [0, 2, 3]
x[row, col]

array([3, 7, 8])

In [7]:
x[[1, 3, 4], [0, 2, 3]]

array([3, 7, 8])

## Array Slices as Views

In [8]:
x = np.arange(0, 10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
x[3:7]

array([3, 4, 5, 6])

In [10]:
y = x[3:7]
y

array([3, 4, 5, 6])

In [11]:
y[0] = 35
y

array([35,  4,  5,  6])

In [12]:
x

array([ 0,  1,  2, 35,  4,  5,  6,  7,  8,  9])

In [13]:
x[5] = 87
x

array([ 0,  1,  2, 35,  4, 87,  6,  7,  8,  9])

In [14]:
y

array([35,  4, 87,  6])

As you can see, elements of `y` are actually a sliced view of `x` and not a new copy, and changing any element in `y` is reflected in `x`.

In [15]:
y.base is x

True

We can see this in 2d arrays too.

In [16]:
np.random.seed(1)
x = np.random.randint(0, 20, (5, 6))
x

array([[ 5, 11, 12,  8,  9, 11],
       [ 5, 15,  0, 16,  1, 12],
       [ 7, 13,  6, 18,  5, 18],
       [11, 10, 14, 18,  4,  9],
       [17,  0, 13,  9,  9,  7]])

In [17]:
y = x[1:4, 2:]
y

array([[ 0, 16,  1, 12],
       [ 6, 18,  5, 18],
       [14, 18,  4,  9]])

In [18]:
y[0, 0] = 200
x

array([[  5,  11,  12,   8,   9,  11],
       [  5,  15, 200,  16,   1,  12],
       [  7,  13,   6,  18,   5,  18],
       [ 11,  10,  14,  18,   4,   9],
       [ 17,   0,  13,   9,   9,   7]])

If we want to explicitly create a copy, we can use `np.copy()` function or `.copy()` method on arrays.

In [19]:
x

array([[  5,  11,  12,   8,   9,  11],
       [  5,  15, 200,  16,   1,  12],
       [  7,  13,   6,  18,   5,  18],
       [ 11,  10,  14,  18,   4,   9],
       [ 17,   0,  13,   9,   9,   7]])

In [20]:
y = x[1:4, 2:].copy()  # or we can do y = np.copy(x[1:4, 2:])
y

array([[200,  16,   1,  12],
       [  6,  18,   5,  18],
       [ 14,  18,   4,   9]])

In [21]:
y[0, 0] = 999
y

array([[999,  16,   1,  12],
       [  6,  18,   5,  18],
       [ 14,  18,   4,   9]])

In [22]:
x

array([[  5,  11,  12,   8,   9,  11],
       [  5,  15, 200,  16,   1,  12],
       [  7,  13,   6,  18,   5,  18],
       [ 11,  10,  14,  18,   4,   9],
       [ 17,   0,  13,   9,   9,   7]])

## Flattening

In [23]:
np.random.seed(3)
x = np.random.randint(0, 20, size=(4, 3))
x

array([[10,  3,  8],
       [ 0, 19, 10],
       [11,  9, 10],
       [ 6,  0, 12]])

In [24]:
x.flatten()

array([10,  3,  8,  0, 19, 10, 11,  9, 10,  6,  0, 12])

In [25]:
x.ravel()

array([10,  3,  8,  0, 19, 10, 11,  9, 10,  6,  0, 12])

Both `flatten()` and `ravel()` appear to do the same thing, but `flatten()` returns a copy and `ravel()` returns a view.

In [26]:
x.flatten().base is x  # not a view

False

In [27]:
x.ravel().base is x  # a view

True

## Type Conversion

In [28]:
x = np.arange(-10, 10).reshape(5, 4)
x

array([[-10,  -9,  -8,  -7],
       [ -6,  -5,  -4,  -3],
       [ -2,  -1,   0,   1],
       [  2,   3,   4,   5],
       [  6,   7,   8,   9]])

In [29]:
x.dtype

dtype('int64')

In [30]:
x.astype('float')

array([[-10.,  -9.,  -8.,  -7.],
       [ -6.,  -5.,  -4.,  -3.],
       [ -2.,  -1.,   0.,   1.],
       [  2.,   3.,   4.,   5.],
       [  6.,   7.,   8.,   9.]])

In [31]:
x.astype('int32')

array([[-10,  -9,  -8,  -7],
       [ -6,  -5,  -4,  -3],
       [ -2,  -1,   0,   1],
       [  2,   3,   4,   5],
       [  6,   7,   8,   9]], dtype=int32)

In [32]:
x.astype('bool')

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True, False,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [33]:
np.random.seed(0)
y = np.random.random((4,4))*100
y

array([[54.88135039, 71.51893664, 60.27633761, 54.4883183 ],
       [42.36547993, 64.58941131, 43.75872113, 89.17730008],
       [96.36627605, 38.34415188, 79.17250381, 52.88949198],
       [56.80445611, 92.55966383,  7.10360582,  8.71292997]])

In [34]:
y.astype('int')

array([[54, 71, 60, 54],
       [42, 64, 43, 89],
       [96, 38, 79, 52],
       [56, 92,  7,  8]])

## Repeating Sequences

In [35]:
a = np.array([3, 5, 8, 9])
a

array([3, 5, 8, 9])

In [36]:
np.tile(a, 3)  # repeats a whole array 3 times

array([3, 5, 8, 9, 3, 5, 8, 9, 3, 5, 8, 9])

In [37]:
np.repeat(a, 3)  # repeats each item 3 times

array([3, 3, 3, 5, 5, 5, 8, 8, 8, 9, 9, 9])

In [38]:
np.random.seed(8)
a = np.random.randint(0, 10, (3, 2))
a

array([[3, 4],
       [1, 9],
       [5, 8]])

In [39]:
np.tile(a, 3)

array([[3, 4, 3, 4, 3, 4],
       [1, 9, 1, 9, 1, 9],
       [5, 8, 5, 8, 5, 8]])

In [40]:
np.repeat(a, 3)

array([3, 3, 3, 4, 4, 4, 1, 1, 1, 9, 9, 9, 5, 5, 5, 8, 8, 8])

## `np.where()`
Like Boolean indexing, but returns index positions of elements where the condition is satisfied.

In [41]:
np.random.seed(8)
a = np.random.randint(0, 20, 10)
a

array([ 3, 17,  9,  5,  8, 19,  8, 16, 13, 17])

In [42]:
a[a<10]

array([3, 9, 5, 8, 8])

In [43]:
np.where(a<10)

(array([0, 2, 3, 4, 6]),)

In [44]:
np.random.seed(8)
a = np.random.randint(0, 20, (3, 2))
a

array([[ 3, 17],
       [ 9,  5],
       [ 8, 19]])

In [45]:
np.where(a>10)  # returns (index of rows, index of columns)

(array([0, 2]), array([1, 1]))

## Concatenation

In [46]:
x = np.zeros([4, 4])
x

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [47]:
y = np.ones([4, 4])
y

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [48]:
np.concatenate([x, y], axis=0)  # along row axis

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [49]:
np.vstack([x,y])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [50]:
np.concatenate([x, y], axis=1)  # along column axis

array([[0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.]])

In [51]:
np.hstack([x,y])

array([[0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1.]])

## Splitting

In [52]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [53]:
a1, a2, a3 = np.split(a, [5, 10])
print(a1, a2, a3)

[0 1 2 3 4] [5 6 7 8 9] [10 11 12 13 14]


In [54]:
x = np.arange(0, 20).reshape(4, 5)
x

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [55]:
upper, lower = np.vsplit(x, [2])
print(upper)
print(lower)

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[10 11 12 13 14]
 [15 16 17 18 19]]


In [56]:
left, right = np.hsplit(x, [3])
print(left)
print(right)

[[ 0  1  2]
 [ 5  6  7]
 [10 11 12]
 [15 16 17]]
[[ 3  4]
 [ 8  9]
 [13 14]
 [18 19]]


## Matrix/Vector Operations

In [57]:
np.random.seed(1)
x = np.random.randint(0, 20, (4, 3))
x

array([[ 5, 11, 12],
       [ 8,  9, 11],
       [ 5, 15,  0],
       [16,  1, 12]])

In [58]:
x.T  # transpose

array([[ 5,  8,  5, 16],
       [11,  9, 15,  1],
       [12, 11,  0, 12]])

In [59]:
np.random.seed(7)
y = np.random.randint(0, 20, (3, 5))
y

array([[15,  4,  3, 19,  7],
       [14,  8, 14, 10,  8],
       [ 7,  6,  4, 16,  7]])

In [60]:
np.matmul(x, y) # matrix multiplication

array([[313, 180, 217, 397, 207],
       [323, 170, 194, 418, 205],
       [285, 140, 225, 245, 155],
       [338, 144, 110, 506, 204]])

In [61]:
np.random.seed(1)
a = np.random.randint(0, 20, 5)
a

array([ 5, 11, 12,  8,  9])

In [62]:
np.random.seed(8)
b = np.random.randint(0, 20, 5)
b

array([ 3, 17,  9,  5,  8])

In [None]:
np.dot(a, b)  # dot product of vector a and b

In [63]:
a.dot(b)

422

In [64]:
np.random.seed(2)
x = np.random.randint(0, 20, (3, 3))
x

array([[ 8, 15, 13],
       [ 8, 11, 18],
       [11,  8,  7]])

In [65]:
np.linalg.det(x)  # determinant of matrix

853.0000000000001

In [66]:
np.linalg.inv(x)  # inverse of matrix

array([[-0.07854631, -0.00117233,  0.14888628],
       [ 0.16647128, -0.10199297, -0.04689332],
       [-0.06682298,  0.11840563, -0.03751465]])

In [67]:
a = np.array([3, 4])
a

array([3, 4])

In [68]:
np.linalg.norm(a)  # magnitude of vector a

5.0

In [69]:
np.sqrt(np.sum(np.square(a)))

5.0

## Sorting

In [70]:
np.random.seed(7)
x = np.random.randint(0, 30, size=10)
x

array([15,  4, 25, 22,  3, 19, 23,  7, 28, 25])

In [71]:
np.sort(x)  # returns sorted array

array([ 3,  4,  7, 15, 19, 22, 23, 25, 25, 28])

In [72]:
x

array([15,  4, 25, 22,  3, 19, 23,  7, 28, 25])

In [73]:
x.sort()  # changes array x
x

array([ 3,  4,  7, 15, 19, 22, 23, 25, 25, 28])

In [74]:
np.random.seed(8)
y = np.random.randint(0, 30, size=10)
y

array([ 3, 20, 17,  9,  5, 26,  8, 19,  8, 16])

In [75]:
np.argsort(y)  # returns index positions of sorted array

array([0, 4, 6, 8, 3, 9, 2, 7, 1, 5])

In [76]:
y[np.argsort(y)]

array([ 3,  5,  8,  8,  9, 16, 17, 19, 20, 26])

In [77]:
np.random.seed(7)
x = np.random.randint(0, 30, size=(4, 5))
x

array([[15,  4, 25, 22,  3],
       [19, 23,  7, 28, 25],
       [14, 23,  8, 25, 14],
       [10, 26,  8,  7,  6]])

In [78]:
np.sort(x, axis=0)  # sort along row axis

array([[10,  4,  7,  7,  3],
       [14, 23,  8, 22,  6],
       [15, 23,  8, 25, 14],
       [19, 26, 25, 28, 25]])

In [79]:
x

array([[15,  4, 25, 22,  3],
       [19, 23,  7, 28, 25],
       [14, 23,  8, 25, 14],
       [10, 26,  8,  7,  6]])

In [80]:
np.sort(x, axis=1)  # sort along column axis

array([[ 3,  4, 15, 22, 25],
       [ 7, 19, 23, 25, 28],
       [ 8, 14, 14, 23, 25],
       [ 6,  7,  8, 10, 26]])

# Task 6
Write a function that takes two numpy arrays (1d) and returns their **cosine similarity**. Use vectorization where possible.

*Cosine similarity* of two vectors is defined as the ratio of their dot product to the product of their magnitudes as follows:

## $\begin{align}cos(\pmb x, \pmb y) = \frac {\pmb x \cdot \pmb y}{||\pmb x|| \cdot ||\pmb y||}\end{align}$



In [83]:
def cosine_similarity(x, y):
    pass