# Computation on NumPy Arrays

## Universal Functions

1. Provide an easy and flexible interface to optimize computation with arrays

2. The key is to use **vectorized** operations, generally implemented through Numpy's universal functions

In [2]:
import numpy as np

In [3]:
#[1,2,3,4,5]

#[1/1, 1/2 , 1/3, 1/4, 1/5] #finding reciprocal

In [6]:
values = np.random.randint(1,10, size = 5)

def compute_reciprocals(values):
    
    output = np.empty(len(values))
    
    for i in range(len(values)):
        
        output[i] = 1.0 / values[i]
    
    return output   

In [7]:
compute_reciprocals(values)

array([0.125, 0.2  , 0.25 , 0.5  , 1.   ])

In [8]:
values

array([8, 5, 4, 2, 1])

## Introduction to Ufuncs:

1. For many types of operations, NumPy provides an interface into a kind of statically typed, compiled routine

2. A Vectorized operation can simply be performed by using a **unary** or **binary** operation on the array, which will then be applied to **each** element. This is known as vectorization

In [9]:
large_array = np.random.randint(1,100, size = 1000000)

In [10]:
%timeit compute_reciprocals(large_array)

2.23 s ± 28.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
np.random.seed(0)
values = np.random.randint(1,10, size = 5)
values

array([6, 1, 4, 4, 8])

In [17]:
print(1.0 / values) #ufunc

[0.16666667 1.         0.25       0.25       0.125     ]


In [18]:
%timeit (1.0 / large_array)

2.24 ms ± 42.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
li = [1,2,3,4,5]

In [22]:
#between two arrays

np.arange(5) / np.arange(1,6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [23]:
#multidimensional arrays

x = np.arange(9).reshape((3,3))
x

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [24]:
2 ** x #unary ufunc

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

1. Vectorized operations in NumPy are implemented via ufuncs, whose main purpose is to quickly execute repeated operations on values in NumPy arrays.

In [26]:
## Array arithmetic

x = np.arange(4)

print('x =', x)

print('x + 5 =', x + 5)
print('x - 5 =', x - 5)
print('x * 2 =', x * 2)

x = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]


In [27]:
print('x / 2 = ', x / 2)
print('x // 2 = ', x // 2) #floor division

x / 2 =  [0.  0.5 1.  1.5]
x // 2 =  [0 0 1 1]


In [28]:
print('-x = ', -x) #negation
print('x ** 2 = ', x ** 2)
print('x % 2 = ', x % 2)

-x =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2 =  [0 1 0 1]


In [29]:
-(0.5 * x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [30]:
np.add(x ,2) 

array([2, 3, 4, 5])

In [31]:
print('x + 2 = ', x + 2)

x + 2 =  [2 3 4 5]


In [32]:
#np.add()
#np.subtract()
#np.negative()
#np.multiply()
#np.divide()
#np.floor_divide()
#np.power()
#np.mod()

In [33]:
#Absolute values

x = np.array([-2, -1, 0 , 1 , 2])

abs(x)

array([2, 1, 0, 1, 2])

In [34]:
np.absolute(x)

array([2, 1, 0, 1, 2])

In [35]:
np.abs(x)

array([2, 1, 0, 1, 2])

In [40]:
#Trigonometric Functions

theta = np.linspace(0, np.pi, 3)

In [42]:
print("theta = ", theta)
print("sin = ", np.sin(theta))
print("cos = ", np.cos(theta))
print("tan = ", np.tan(theta))

theta =  [0.         1.57079633 3.14159265]
sin =  [0.0000000e+00 1.0000000e+00 1.2246468e-16]
cos =  [ 1.000000e+00  6.123234e-17 -1.000000e+00]
tan =  [ 0.00000000e+00  1.63312394e+16 -1.22464680e-16]


In [43]:
#Exponents

x = [1,2,3]

print('x =', x)
print("e^x = ", np.exp(x))
print("2^x = ", np.exp2(x))
print("3^x = ", np.power(3,x))

x = [1, 2, 3]
e^x =  [ 2.71828183  7.3890561  20.08553692]
2^x =  [2. 4. 8.]
3^x =  [ 3  9 27]


In [44]:
#Logarithms

x = [1, 2 , 4, 10]

print('x =', x)
print("ln(x) = ", np.log(x))
print("log2(x) = ", np.log2(x))
print("log10(x) = ", np.log10(x))

x = [1, 2, 4, 10]
ln(x) =  [0.         0.69314718 1.38629436 2.30258509]
log2(x) =  [0.         1.         2.         3.32192809]
log10(x) =  [0.         0.30103    0.60205999 1.        ]


## Aggregations: Min, Max, and Everything in Between

1. When we have a large amount of data, the first step is to compute the summary statistics for the data

2. Most common summary statistics are (mean, standard deviation, sum, product, median, minimum and maximum values, range, quantiles, etc)

In [47]:
#Minimum and Maximum

L = np.random.random(100)

In [48]:
min(L), max(L)

(0.011714084185001972, 0.9988470065678665)

In [49]:
np.min(L), np.max(L)

(0.011714084185001972, 0.9988470065678665)

In [52]:
big_array = np.random.rand(1000000)

In [53]:
%timeit min(big_array)
%timeit np.min(big_array)

91.6 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
444 µs ± 22.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [54]:
#Sum
sum(L)

51.7284634402095

In [55]:
np.sum(L)

51.72846344020951

In [56]:
print(big_array.min(), big_array.max(), big_array.sum())

2.067514368597756e-07 0.9999992772471815 499754.2936987862


In [57]:
#Multidimensional Aggregates

M = np.random.random((3,4))
print(M)

[[0.62100371 0.78915521 0.63133028 0.78005231]
 [0.26660251 0.03428148 0.01311698 0.49702961]
 [0.19873074 0.18092282 0.59738671 0.77186924]]


In [58]:
M.sum()

5.381481605312544

In [59]:
M.min()

0.01311698435541564

In [60]:
M.max()

0.7891552134906494

In [61]:
M.min(axis = 0) #each-column

array([0.19873074, 0.03428148, 0.01311698, 0.49702961])

In [62]:
M.min(axis = 1) #each-row

array([0.62100371, 0.01311698, 0.18092282])

In [63]:
#np.sum()
#np.prod #Product of elements
#np.mean()
#np.std
#np.var
#np.min
#np.max
#np.median
#np.argmin  #index of the minimum element
#np.argmax

## Broadcasting

1. Set of rules for applying **binary ufuncs**(addition, subtraction, multiplication etc) on arrays of different sizes

2. Another means of vectorizing operations is to use Numpy's broadcasting functionality

In [64]:
a = np.array([0,1,2])
b = np.array([5,5,5])

a + b

array([5, 6, 7])

In [65]:
a + 5 #adding a scalar [5,5,5] stretched

array([5, 6, 7])

In [66]:
M = np.ones((3,3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [67]:
M + a # Adding 1D to 2D

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

[0,1,2]
[0,1,2]
[0,1,2]

In [68]:
a = np.arange(3)
b = np.arange(3).reshape(3,1)

print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [69]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [None]:
[0 1 2]
[0 1 2]
[0 1 2]

[[0] [0] [0]
 [1] [1] [1]
 [2] [2] [2]]

## Rules of Broadcasting:

1. Rule 1 : If two arrays differ in their number of dimensions, the **shape** of the one with the fewer dimensions is **padded** with **ones** on its leading **left** side.

2. Rule 2: If the shape of the two arrays do not match in any dimension, the array with shape equal to 1 in that dimension is **stretched** to match the other shape

3. Rule 3 : If in any dimension the sizes disagree and neither is equal to 1, an error is raised.


In [72]:
M = np.ones((2,3))
a = np.arange(3)

print(M)
print(a)

[[1. 1. 1.]
 [1. 1. 1.]]
[0 1 2]


In [71]:
print(M.shape)
print(a.shape)

(2, 3)
(3,)


In [None]:
[[1. 1. 1.]
 [1. 1. 1.]]

[[0 1 2]
 [0 1 2]]

In [73]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

In [74]:
# M.shape -> (2,3)
#a.shape -> (1,3) #Rule 1 

In [75]:
# M.shape -> (2,3)
# a.shape -> (2,3) #Rule 2

In [76]:
a = np.arange(3).reshape((3,1))
b = np.arange(3)

print(a)
print(b)

[[0]
 [1]
 [2]]
[0 1 2]


In [77]:
print(a.shape)
print(b.shape)

(3, 1)
(3,)


In [None]:
#a.shape -> (3,1)
#b.shape -> (1,3) #Rule 1 

In [78]:
# a.shape -> (3,3)
#b.shape -> (3,3) #Rule 2 

In [79]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [81]:
M = np.ones((3,2))
a = np.arange(3)

print(M)
print(a)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[0 1 2]


In [82]:
print(M.shape)
print(a.shape)

(3, 2)
(3,)


In [None]:
# M.shape -> (3,2)
#a.shape -> (1,3) #Rule 1 

In [None]:
# M.shape -> (3,2)
#a.shape -> (3,3) #Rule 2 

In [83]:
M + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 