# Some ufuncs in numpy

In [37]:
import numpy as np

## Ahritmetic functions

In [38]:
A = np.arange(10)
B = np.random.randint(0, 10, (1,10)) # randint is used to generate random integers in a given range, the parameters are (low value , high value, size) where size is the shape of the array
# The  tuple (1,10) is the shape of the array, so it is a 1D array of 10 elements
print(f'The first array is: \n{A}\nThe second array is: \n{B}')

The first array is: 
[0 1 2 3 4 5 6 7 8 9]
The second array is: 
[[2 4 9 8 5 4 0 9 4 8]]


In [39]:
C = A[:, np.newaxis] # This is used to add a new axis to the array, so it is now a 2D array
D = A[np.newaxis, :] # This is used to add a new axis to the array, so it is now a 2D array
print(f'The shape of the array is: {C.shape}\n The shape of D is {D.shape}')
print(f'C: \n{C}\nD: \n{D}')

The shape of the array is: (10, 1)
 The shape of D is (1, 10)
C: 
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
D: 
[[0 1 2 3 4 5 6 7 8 9]]


In [40]:
print(f'{C[5][0]}')
print(f'{D[0][5]}')

5
5


In [41]:
# Print all arrays now to we can use Ahritmetic operations on them

print(f'A: \n{A}\nB: \n{B}\nC: \n{C}\nD: \n{D}')

A: 
[0 1 2 3 4 5 6 7 8 9]
B: 
[[2 4 9 8 5 4 0 9 4 8]]
C: 
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
D: 
[[0 1 2 3 4 5 6 7 8 9]]


In [42]:
sum_B_and_C = B + C
sum_B_and_D = B + D
sum_B_and_A = B + A
print(f'The sum of B and C is: \n\n{sum_B_and_C}\n\nThe sum of B and D is: \n{sum_B_and_D}\n\nThe sum of B and A is: \n{sum_B_and_A}')

The sum of B and C is: 

[[ 2  4  9  8  5  4  0  9  4  8]
 [ 3  5 10  9  6  5  1 10  5  9]
 [ 4  6 11 10  7  6  2 11  6 10]
 [ 5  7 12 11  8  7  3 12  7 11]
 [ 6  8 13 12  9  8  4 13  8 12]
 [ 7  9 14 13 10  9  5 14  9 13]
 [ 8 10 15 14 11 10  6 15 10 14]
 [ 9 11 16 15 12 11  7 16 11 15]
 [10 12 17 16 13 12  8 17 12 16]
 [11 13 18 17 14 13  9 18 13 17]]

The sum of B and D is: 
[[ 2  5 11 11  9  9  6 16 12 17]]

The sum of B and A is: 
[[ 2  5 11 11  9  9  6 16 12 17]]


Look, A is an 1d array of 10 elements, and B is a 2d array, when we add them, numpy will broadcast A to 2d array, and then add them together.

In [43]:
print(f'The ndim of A: {A.ndim}\nThe ndim of B: {B.ndim}\nThe ndim of C: {C.ndim}\nThe ndim of D: {D.ndim}\n')
print(f'D is a new axis of A, so they are equal? {A == D}\n')
print(f'The sum of A and B is equal to the sum of D and B? {sum_B_and_A == sum_B_and_D}\n')

The ndim of A: 1
The ndim of B: 2
The ndim of C: 2
The ndim of D: 2

D is a new axis of A, so they are equal? [[ True  True  True  True  True  True  True  True  True  True]]

The sum of A and B is equal to the sum of D and B? [[ True  True  True  True  True  True  True  True  True  True]]



Look, A is an 1d array, and D is a new axis array based on A, when we compare them, numpy will broadcast A to 2d array, and then compare them together. The return will be a 2d array.

To remove the extra axis in D, we can use squeeze() function.

In [44]:
# removing the extra dimension of D

print(f'The shape before removing the extra dimension of D is: {D.shape}\n')

D = D.squeeze()

print(f'The shape after removing the extra dimension of D is: {D.shape}\n')

The shape before removing the extra dimension of D is: (1, 10)

The shape after removing the extra dimension of D is: (10,)



### np.sum()

In [51]:
# The np.sum() function is used to sum all the elements of an array, it can also be used to sum the elements of a specific axis. The return value is a scalar
# The parameter is the axis to sum, if it is not specified, all the elements are summed

print(f'The sum of all elements of A is: {np.sum(A)}\n') # 0+1+2+3+4+5+6+7+8+9 = 45
print(f'The sum of all elements of B is: {np.sum(B)}\n')

print(f'The sum of B passing the axis parameter is: {np.sum(B, axis=1)}\n') # The axis parameter is used to sum the elements of a specific axis, in this case, the axis is 1, so the sum is done on the rows

The sum of all elements of A is: 45

The sum of all elements of B is: 53

The sum of B passing the axis parameter is: [53]



In [60]:
A = np.arange(80).reshape(2,2,5,4)
print(f'The ndim of A is: {A.ndim}\nThe shape of A is: {A.shape}\n')
print(f'\n{A}\n')
print(A[0][0][1][0])

The ndim of A is: 4
The shape of A is: (2, 2, 5, 4)


[[[[ 0  1  2  3]
   [ 4  5  6  7]
   [ 8  9 10 11]
   [12 13 14 15]
   [16 17 18 19]]

  [[20 21 22 23]
   [24 25 26 27]
   [28 29 30 31]
   [32 33 34 35]
   [36 37 38 39]]]


 [[[40 41 42 43]
   [44 45 46 47]
   [48 49 50 51]
   [52 53 54 55]
   [56 57 58 59]]

  [[60 61 62 63]
   [64 65 66 67]
   [68 69 70 71]
   [72 73 74 75]
   [76 77 78 79]]]]

4


In [67]:
print(f'The sum of A is: {np.sum(A)}\n')
print(f'The sum of A passing the axis parameter is: {np.sum(A, axis=3)}\n') # The axis parameter is used to sum the elements of a specific axis, in this case, the axis is 0, so the sum is done on the first dimension
# Axis = 3 means that the sum is done at the fourth dimension, in this case the sum will be representaded by 0+1+2+3 = 6 and 4+5+6+7 = 22 and so on.

The sum of A is: 3160

The sum of A passing the axis parameter is: [[[  6  22  38  54  70]
  [ 86 102 118 134 150]]

 [[166 182 198 214 230]
  [246 262 278 294 310]]]



### np.add()

- np.add() is used to add two arrays, it is the same as the + operator but it can be used with the out parameter to store the result in a specific array

- The parameters are:

    - x1: The first array to be added
    - x2: The second array to be added
    - out: The array in which to store the output
    - where: This is a boolean array which is used to decide which elements of the output array will have their values changed. If no where parameter is specified, all the elements will be changed.
    - casting: This is used to decide when the data type of the output array will be promoted to a higher type. The possible values are: ‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’. The default value is ‘same_kind’.
    - order: This specifies whether the array is stored in row-major (C-style) or column-major (Fortran-style) order in memory. The possible values are ‘C’ or ‘F’. The default value is ‘K’.
    - dtype: This specifies the data type of the output array. If this is not specified, the data type of the input arrays will be used as the data type of the output array.
    - subok: If this is True, then the newly created array will use the sub-class type of the input array. Otherwise, the output array will be forced to be a base-class array (default).
    - signature: This is a tuple of the signature of the function that is to be called.

In [72]:
A = np.arange(10)
B = np.random.randint(0,10, (10))
add_A_and_B = np.zeros(10)
print(f'Array A: {A}\nArray B: {B}\nThe array add_A_and_B is: {add_A_and_B}\n')



np.add(A,B, out=add_A_and_B) # The out parameter is used to store the result of the operation in a new 
print(f'The array add_A_and_B is: {add_A_and_B}\n')

Array A: [0 1 2 3 4 5 6 7 8 9]
Array B: [9 1 2 2 9 5 6 8 7 7]
The array add_A_and_B is: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

The array add_A_and_B is: [ 9.  2.  4.  5. 13. 10. 12. 15. 15. 16.]



In [74]:
# If the np.add() is the same as the + operator, why use it? Because it is faster than the + operator

n = 10000000
A = np.random.rand(n)
B = np.random.rand(n)

%timeit A+B
%timeit np.add(A,B)

46.8 ms ± 553 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
46.8 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### np.subtract()

- np.subtract() is used to subtract two arrays, it is the same as the - operator but it can be used with the out parameter to store the result in a specific array

- The parameters are:

    - x1: The first array to be subtracted from
    - x2: The second array to be subtracted by
    - out: The array in which to store the output
    - where: This is a boolean array which is used to decide which elements of the output array will have their values changed. If no where parameter is specified, all the elements will be changed.
    - casting: This is used to decide when the data type of the output array will be promoted to a higher type. The possible values are: ‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’. The default value is ‘same_kind’.
    - order: This specifies whether the array is stored in row-major (C-style) or column-major (Fortran-style) order in memory. The possible values are ‘C’ or ‘F’. The default value is ‘K’.
    - dtype: This specifies the data type of the output array. If this is not specified, the data type of the input arrays will be used as the data type of the output array.
    - subok: If this is True, then the newly created array will use the sub-class type of the input array. Otherwise, the output array will be forced to be a base-class array (default).
    - signature: This is a tuple of the signature of the function that is to be called.

The reason to use this function in order to use the '-' operator is that the function is faster than the operator.

In [82]:
A = np.round(np.random.rand(10),2)
B = np.round(np.random.rand(10),2)

print(f'A: {A}\nB: {B}\n')



A: [0.64 0.43 0.5  0.03 0.83 0.27 0.39 0.59 0.3  0.61]
B: [0.3  0.71 0.52 0.88 0.07 0.33 0.18 0.99 0.74 0.53]



In [83]:
C = A - B
a_subtract_b = np.subtract(A,B)

print(f'C: {C}\na_subtract_b: {a_subtract_b}\n')

C: [ 0.34 -0.28 -0.02 -0.85  0.76 -0.06  0.21 -0.4  -0.44  0.08]
a_subtract_b: [ 0.34 -0.28 -0.02 -0.85  0.76 -0.06  0.21 -0.4  -0.44  0.08]



In [84]:
n = 10000000
A = np.random.rand(n)
B = np.random.rand(n)

%timeit A-B
%timeit np.subtract(A,B)

55.5 ms ± 5.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
51.5 ms ± 912 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### np.multiply()

- np.multiply() is used to multiply two arrays, it is the same as the * operator but it can be used with the out parameter to store the result in a specific array

- The parameters are:

    - x1: The first array to be multiplied
    - x2: The second array to be multiplied
    - out: The array in which to store the output
    - where: This is a boolean array which is used to decide which elements of the output array will have their values changed. If no where parameter is specified, all the elements will be changed.
    - casting: This is used to decide when the data type of the output array will be promoted to a higher type. The possible values are: ‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’. The default value is ‘same_kind’.
    - order: This specifies whether the array is stored in row-major (C-style) or column-major (Fortran-style) order in memory. The possible values are ‘C’ or ‘F’. The default value is ‘K’.
    - dtype: This specifies the data type of the output array. If this is not specified, the data type of the input arrays will be used as the data type of the output array.
    - subok: If this is True, then the newly created array will use the sub-class type of the input array. Otherwise, the output array will be forced to be a base-class array (default).
    - signature: This is a tuple of the signature of the function that is to be called.


In [85]:
A = np.round(np.random.rand(10),2)
B = np.round(np.random.rand(10),2)

print(f'A: {A}\nB: {B}\n')

A: [0.36 0.77 0.94 0.47 0.16 0.91 0.88 0.78 0.71 0.32]
B: [0.04 0.7  0.77 0.41 0.02 0.28 0.06 0.17 0.45 0.27]



In [86]:
C = A * B

a_multiply_b = np.multiply(A,B)

print(f'C: {C}\na_multiply_b: {a_multiply_b}\n')

C: [0.0144 0.539  0.7238 0.1927 0.0032 0.2548 0.0528 0.1326 0.3195 0.0864]
a_multiply_b: [0.0144 0.539  0.7238 0.1927 0.0032 0.2548 0.0528 0.1326 0.3195 0.0864]



In [87]:
n = 10000000
A = np.random.rand(n)
B = np.random.rand(n)

%timeit A*B
%timeit np.multiply(A,B)


The slowest run took 4.30 times longer than the fastest. This could mean that an intermediate result is being cached.
82 ms ± 48.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
54.2 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### np.power()

- np.power() is used to raise the elements of the first array to the power of the elements of the second array, it is the same as the ** operator but it can be used with the out parameter to store the result in a specific array

- The parameters are:

    - x1: The first array to be raised to a power
    - x2: The second array containing the powers to which the elements of the first array will be raised
    - out: The array in which to store the output
    - where: This is a boolean array which is used to decide which elements of the output array will have their values changed. If no where parameter is specified, all the elements will be changed.
    - casting: This is used to decide when the data type of the output array will be promoted to a higher type. The possible values are: ‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’. The default value is ‘same_kind’.
    - order: This specifies whether the array is stored in row-major (C-style) or column-major (Fortran-style) order in memory. The possible values are ‘C’ or ‘F’. The default value is ‘K’.
    - dtype: This specifies the data type of the output array. If this is not specified, the data type of the input arrays will be used as the data type of the output array.
    - subok: If this is True, then the newly created array will use the sub-class type of the input array. Otherwise, the output array will be forced to be a base-class array (default).
    - signature: This is a tuple of the signature of the function that is to be called.

In [88]:
A = np.round(np.random.rand(10),2)

B = np.round(np.random.rand(10),2)

print(f'A: {A}\nB: {B}\n')


A: [0.96 0.88 0.56 0.29 0.96 0.36 0.81 0.64 0.35 0.6 ]
B: [0.38 0.09 0.02 0.16 0.97 0.57 0.58 0.86 0.71 0.25]



In [89]:
C = A ** 2

a_power_2 = np.power(A,2)

print(f'C: {C}\na_power_2: {a_power_2}\n')

C: [0.9216 0.7744 0.3136 0.0841 0.9216 0.1296 0.6561 0.4096 0.1225 0.36  ]
a_power_2: [0.9216 0.7744 0.3136 0.0841 0.9216 0.1296 0.6561 0.4096 0.1225 0.36  ]



In [90]:
a_power_b = np.power(A,B)

print(f'a_power_b: {a_power_b}\n')


a_power_b: [0.98460734 0.98856093 0.98847061 0.82032072 0.96117639 0.55858906
 0.88495525 0.68126296 0.47455692 0.88011174]



### Other functions

There are others functions like 
```python

np.div() # divide two arrays is the same as the / operator

np.mod() # get the remainder of two arrays is the same as the % operator

np.mean() # get the mean of an array

np.std() # get the standard deviation of an array

np.var() # get the variance of an array
``` 

## Trigonometric functions

A: [0.37322484 0.06892315 0.29957257 0.00904907 0.24389589 0.56013346
 0.61641783 0.73890273 0.69757144 0.2005603 ]
B: [0.47217607 0.13734291 0.37875502 0.89335247 0.75939972 0.44053071
 0.43813432 0.27744128 0.93399749 0.7161539 ]



## Comparison functions

## Missing values (NaN-safe functions)

## Rounding Floating Point Numbers

## Output arguments