### Numpy Practice

#### the array object is numpy is called ndarray

In [1]:
import numpy as np

In [2]:
sample_array = np.array([842, 932, 687432, 93274, 10])
sample_array

array([   842,    932, 687432,  93274,     10])

In [3]:
sample_array_list = [842, 932, 687432, 93274, 10]
sample_array_list

[842, 932, 687432, 93274, 10]

In [4]:
for i in range(len(sample_array)):
    if sample_array[i] == sample_array_list[i]:
        pass
    else:
        print("not same")
print("Matches")

Matches


In [5]:
second_order_tensor = np.array([[1,2,3], [4,5,6]])
third_order_tensor = np.array([[[10,20,30], [40,50,60]], [[70,80,90], [100,110,120]]])
print("second_order_tensor", second_order_tensor)
print("number of dimensions", (second_order_tensor.ndim))
print("third_order_tensor", third_order_tensor)
print("number of dimensions", (third_order_tensor.ndim))

second_order_tensor [[1 2 3]
 [4 5 6]]
number of dimensions 2
third_order_tensor [[[ 10  20  30]
  [ 40  50  60]]

 [[ 70  80  90]
  [100 110 120]]]
number of dimensions 3


In [6]:
arr = np.array([1,2,3,4], dtype = 'S')
print(arr)
print(arr.dtype)

[b'1' b'2' b'3' b'4']
|S1


#### ndarray is a generic multidimensional container for homogeneous data - i.e. all elements must be the same type

In [7]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [8]:
sample_float_array = sample_array.astype(np.float64)

In [9]:
sample_float_array

array([8.42000e+02, 9.32000e+02, 6.87432e+05, 9.32740e+04, 1.00000e+01])

In [10]:
sample_float_array_1 = np.array([20.67, 30.89, 27.7665])
sample_float_array_1.dtype

dtype('float64')

In [11]:
sample_integer_array = sample_float_array_1.astype(np.int32)
sample_integer_array

array([20, 30, 27], dtype=int32)

In [12]:
example_array = np.arange(10)

In [13]:
example_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
example_array[5:8]

array([5, 6, 7])

In [15]:
sliced_array_example = example_array[5:8]
sliced_array_example

array([5, 6, 7])

In [16]:
sliced_array_example[1] = 12345

In [17]:
sliced_array_example

array([    5, 12345,     7])

In [18]:
example_array #When I change values in the sliced_array_example, the mutations are reflected in the original array example_array

array([    0,     1,     2,     3,     4,     5, 12345,     7,     8,
           9])

##### As a distinction from the built in array lists, Numpy array(ndarray) slices are "views on the original array". This means that the data is not copied, and any modification to the view will be reflected in the source array

##### If you want to copy of a slice of an ndarray instead of a view, you will need to explicitly copy the array - for example, example_array[5:8].copy()

In [19]:
another_sliced_array = example_array[5:8].copy()

In [20]:
another_sliced_array

array([    5, 12345,     7])

In [21]:
another_sliced_array[1] = 6

In [22]:
print("Copy of the sliced array",another_sliced_array)
print("View of the sliced array", sliced_array_example)
print("Original array", example_array)

Copy of the sliced array [5 6 7]
View of the sliced array [    5 12345     7]
Original array [    0     1     2     3     4     5 12345     7     8     9]


In [23]:
third_order_tensor

array([[[ 10,  20,  30],
        [ 40,  50,  60]],

       [[ 70,  80,  90],
        [100, 110, 120]]])

In [24]:
third_order_tensor[0]

array([[10, 20, 30],
       [40, 50, 60]])

In [25]:
third_order_tensor[0, 1]

array([40, 50, 60])

In [26]:
third_order_tensor[0, 1, 1]

50

#### The above multi-dimensional indexing syntax will not work with Python objects such as Lists of Lists

In [27]:
second_order_tensor

array([[1, 2, 3],
       [4, 5, 6]])

In [28]:
second_order_tensor[:, 1:]

array([[2, 3],
       [5, 6]])

In [29]:
slice_of_second_row = second_order_tensor[1, :2]

In [30]:
slice_of_second_row.shape

(2,)

#### Boolean indexing

In [31]:
names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])

In [32]:
data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2], [-12, -4], [3, 4]])

In [33]:
data

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [34]:
names == "Bob"

array([ True, False, False,  True, False, False, False])

In [35]:
data[names == "Bob"]

array([[4, 7],
       [0, 0]])

In [36]:
names != "Bob"

array([False,  True,  True, False,  True,  True,  True])

In [37]:
data[(names != "Bob")]

array([[  0,   2],
       [ -5,   6],
       [  1,   2],
       [-12,  -4],
       [  3,   4]])

In [38]:
sample_matrix = np.arange(32).reshape((8, 4))

In [39]:
sample_matrix

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [40]:
sample_matrix[[1, 1, 1, 1], [2, 2, 2, 2]]

array([6, 6, 6, 6])

In [41]:
sample_matrix[[1, 1, 1, 1]]

array([[4, 5, 6, 7],
       [4, 5, 6, 7],
       [4, 5, 6, 7],
       [4, 5, 6, 7]])

In [42]:
sample_matrix[[1, 5, 7, 2], [0, 3, 1, 2]]# this returns the i X j elements i.e. 0th index element in the 1st index row, 3rd index element in the 5th index row and so on

array([ 4, 23, 29, 10])

In [43]:
sample_matrix[[1, 5, 7, 2]][:, [0, 3, 1, 2]]# this returns 0th element for 1st, 5th, 7th and 2nd row in the first column, 3rd index element for the respective rows, followed by 1st index element for the respective rows and so on

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [44]:
compute_sample_matrix = np.array([[0, 1, 0], [1, 2, -2], [6, 3, 2], [-1, 0, -1], [1, 0, 1]])

In [45]:
compute_sample_matrix

array([[ 0,  1,  0],
       [ 1,  2, -2],
       [ 6,  3,  2],
       [-1,  0, -1],
       [ 1,  0,  1]])

In [46]:
compute_sample_matrix.T

array([[ 0,  1,  6, -1,  1],
       [ 1,  2,  3,  0,  0],
       [ 0, -2,  2, -1,  1]])

In [47]:
np.dot(compute_sample_matrix.T, compute_sample_matrix)

array([[39, 20, 12],
       [20, 14,  2],
       [12,  2, 10]])

In [48]:
samples = np.random.standard_normal(size=(4, 4))

In [49]:
samples

array([[-0.41132892, -0.92783084, -1.33973658,  0.11588323],
       [ 1.03476662,  0.7628718 , -1.47022276, -0.00531465],
       [-0.83388696, -0.78805074, -0.26912007, -0.68143512],
       [-0.95055867,  1.63536842, -0.46521661,  1.39404273]])

In [50]:
another_sample_array = np.arange(20)

In [52]:
another_sample_array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [54]:
np.sqrt(another_sample_array)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739,
       3.87298335, 4.        , 4.12310563, 4.24264069, 4.35889894])

In [55]:
np.exp(another_sample_array)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04,
       1.62754791e+05, 4.42413392e+05, 1.20260428e+06, 3.26901737e+06,
       8.88611052e+06, 2.41549528e+07, 6.56599691e+07, 1.78482301e+08])

In [56]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])

In [57]:
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

In [58]:
cond = np.array([True, False, True, True, False])

In [59]:
result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)] ## Very very slow

In [60]:
result

[1.1, 2.2, 1.3, 1.4, 2.5]

In [61]:
result_now = np.where(cond, xarr, yarr)

In [62]:
result_now

array([1.1, 2.2, 1.3, 1.4, 2.5])

#### A typical use of where in data analysis is to produce a new array values based on another array.

#### Suppose you had a matrix of randomly generated data and you wanted to replace all positive values with 2 and all negative values with -2, this possible to do with numpy.where

In [64]:
rng = np.random.default_rng(seed=12345)

In [65]:
yet_another_random_array = rng.standard_normal((4, 4))

In [66]:
yet_another_random_array

array([[-1.42382504,  1.26372846, -0.87066174, -0.25917323],
       [-0.07534331, -0.74088465, -1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306,  2.34740965,  0.96849691],
       [-0.75938718,  0.90219827, -0.46695317, -0.06068952]])

In [69]:
np.where(yet_another_random_array > 0, 0, -1)

array([[-1,  0, -1, -1],
       [-1, -1, -1,  0],
       [ 0, -1,  0,  0],
       [-1,  0, -1, -1]])