In [1]:
import numpy as np 

In [4]:
alist = [1,2,3,4,5]
narray = np.array([1,2,3,4])

In [5]:
print(alist)
print(narray)

print(type(alist))
print(type(narray))

[1, 2, 3, 4, 5]
[1 2 3 4]
<class 'list'>
<class 'numpy.ndarray'>


# Algebraic Operation on the numpy array 

In [9]:
print(narray + narray)
print(alist + alist)

[2 4 6 8]
[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]


In [7]:
print(narray * 3)
print(alist * 3)

[ 3  6  9 12]
[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]


In [10]:
npmatrix1 = np.array([narray, narray, narray]) # Matrix initialized with NumPy arrays
npmatrix2 = np.array([alist, alist, alist]) # Matrix initialized with lists
npmatrix3 = np.array([narray, [1, 1, 1, 1], narray]) # Matrix initialized with both types

print(npmatrix1)
print(npmatrix2)
print(npmatrix3)

[[1 2 3 4]
 [1 2 3 4]
 [1 2 3 4]]
[[1 2 3 4 5]
 [1 2 3 4 5]
 [1 2 3 4 5]]
[[1 2 3 4]
 [1 1 1 1]
 [1 2 3 4]]


In [11]:
# Example 1:

okmatrix = np.array([[1, 2], [3, 4]]) # Define a 2x2 matrix
print(okmatrix) # Print okmatrix
print(okmatrix * 2) # Print a scaled version of okmatrix

[[1 2]
 [3 4]]
[[2 4]
 [6 8]]


In [12]:
# Example 2:

badmatrix = np.array([[1, 2], [3, 4], [5, 6, 7]]) # Define a matrix. Note the third row contains 3 elements
print(badmatrix) # Print the malformed matrix
print(badmatrix * 2) # It is supposed to scale the whole matrix

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3,) + inhomogeneous part.

In [13]:
# Scaling and translating matrix 

In [14]:
# Scale by 2 and translate 1 unit the matrix
result = okmatrix * 2 + 1 # For each element in the matrix, multiply by 2 and add 1
print(result)

[[3 5]
 [7 9]]


In [15]:
# Add two compatible matrices
result1 = okmatrix + okmatrix
print(result1)

# Subtract two compatible matrices. This is called the difference vector
result2 = okmatrix - okmatrix
print(result2)

[[2 4]
 [6 8]]
[[0 0]
 [0 0]]


In [16]:
result = okmatrix * okmatrix # Multiply each element by itself
print(result)

[[ 1  4]
 [ 9 16]]


In [17]:
matrix3x2 = np.array([[1, 2], [3, 4], [5, 6]]) # Define a 3x2 matrix
print('Original matrix 3 x 2')
print(matrix3x2)
print('Transposed matrix 2 x 3')
print(matrix3x2.T)

Original matrix 3 x 2
[[1 2]
 [3 4]
 [5 6]]
Transposed matrix 2 x 3
[[1 3 5]
 [2 4 6]]


In [18]:
# note that the transpose matrix doesn't affect 1D arrays 
nparray = np.array([1, 2, 3, 4]) # Define an array
print('Original array')
print(nparray)
print('Transposed array')
print(nparray.T)

Original array
[1 2 3 4]
Transposed array
[1 2 3 4]


In [19]:
# perhapbs we can define the 1D arrays as follows 
nparray = np.array([[1, 2, 3, 4]]) # Define a 1 x 4 matrix. Note the 2 level of square brackets
print('Original array')
print(nparray)
print('Transposed array')
print(nparray.T)

Original array
[[1 2 3 4]]
Transposed array
[[1]
 [2]
 [3]
 [4]]


## Get the norm of a nparray or matrix

In linear algebra, the norm of an n-dimensional vector $\vec a$   is defined as:

$$ norm(\vec a) = ||\vec a|| = \sqrt {\sum_{i=1}^{n} a_i ^ 2}$$

Calculating the norm of vector or even of a matrix is a general operation when dealing with data. Numpy has a set of functions for linear algebra in the subpackage **linalg**, including the **norm** function. Let us see how to get the norm a given array or matrix:

In [21]:
nparray1 = np.array([1, 2, 3, 4]) # Define an array
norm1 = np.linalg.norm(nparray1)

nparray2 = np.array([[1, 2], [3, 4]]) # Define a 2 x 2 matrix. Note the 2 level of square brackets
norm2 = np.linalg.norm(nparray2) 

print(norm1)
print(norm2)

5.477225575051661
5.477225575051661


Note that without any other parameter, the norm function treats the matrix as being just an array of numbers. However, it is possible to get the norm by rows or by columns. The axis parameter controls the form of the operation:

- axis=0 means get the norm of each column
- axis=1 means get the norm of each row.

In [22]:
nparray2 = np.array([[1, 1], [2, 2], [3, 3]]) # Define a 3 x 2 matrix. 

normByCols = np.linalg.norm(nparray2, axis=0) # Get the norm for each column. Returns 2 elements
normByRows = np.linalg.norm(nparray2, axis=1) # get the norm for each row. Returns 3 elements

print(normByCols)
print(normByRows)

[3.74165739 3.74165739]
[1.41421356 2.82842712 4.24264069]


## The dot product between arrays: All the flavors

The dot product or scalar product or inner product between two vectors $\vec a$ and $\vec b$ of the same size is defined as:
$$\vec a \cdot \vec b = \sum_{i=1}^{n} a_i b_i$$

The dot product takes two vectors and returns a single number.

In [23]:
nparray1 = np.array([0, 1, 2, 3]) # Define an array
nparray2 = np.array([4, 5, 6, 7]) # Define an array

flavor1 = np.dot(nparray1, nparray2) # Recommended way
print(flavor1)

flavor2 = np.sum(nparray1 * nparray2) # Ok way
print(flavor2)

flavor3 = nparray1 @ nparray2         # Geeks way
print(flavor3)

# As you never should do:             # Noobs way
flavor4 = 0
for a, b in zip(nparray1, nparray2):
    flavor4 += a * b
    
print(flavor4)

38
38
38
38


In [24]:
norm1 = np.dot(np.array([1, 2]), np.array([3, 4])) # Dot product on nparrays
norm2 = np.dot([1, 2], [3, 4]) # Dot product on python lists

print(norm1, '=', norm2 )

11 = 11


Finally, note that the norm is the square root of the dot product of the vector with itself. That gives many options to write that function:

$$ norm(\vec a) = ||\vec a|| = \sqrt {\sum_{i=1}^{n} a_i ^ 2} = \sqrt {a \cdot a}$$


Sums by rows or columns
Another general operation performed on matrices is the sum by rows or columns. Just as we did for the function norm, the axis parameter controls the form of the operation:

- axis=0 means to sum the elements of each column together.
- axis=1 means to sum the elements of each row together.

In [26]:
nparray2 = np.array([[1, -1], [2, -2], [3, -3]]) # Define a 3 x 2 matrix. 

sumByCols = np.sum(nparray2, axis=0) # Get the sum for each column. Returns 2 elements
sumByRows = np.sum(nparray2, axis=1) # get the sum for each row. Returns 3 elements

print('Sum by columns: ')
print(sumByCols)
print('Sum by rows:')
print(sumByRows)

Sum by columns: 
[ 6 -6]
Sum by rows:
[0 0 0]


## Get the mean by rows or columns

As with the sums, one can get the **mean** by rows or columns using the **axis** parameter. Just remember that the mean is the sum of the elements divided by the length of the vector
$$ mean(\vec a) = \frac {{\sum_{i=1}^{n} a_i }}{n}$$

In [27]:
nparray2 = np.array([[1, -1], [2, -2], [3, -3]]) # Define a 3 x 2 matrix. Chosen to be a matrix with 0 mean

mean = np.mean(nparray2) # Get the mean for the whole matrix
meanByCols = np.mean(nparray2, axis=0) # Get the mean for each column. Returns 2 elements
meanByRows = np.mean(nparray2, axis=1) # get the mean for each row. Returns 3 elements

print('Matrix mean: ')
print(mean)
print('Mean by columns: ')
print(meanByCols)
print('Mean by rows:')
print(meanByRows)

Matrix mean: 
0.0
Mean by columns: 
[ 2. -2.]
Mean by rows:
[0. 0. 0.]


## Center the columns of a matrix 
Centring the attributes of a data matrix is another essential preprocessing step. Centering a matrix means to remove the column mean to elements inside the column. The mean by columns of a centered matrix is always 0

In [28]:
nparray2 = np.array([[1, 1], [2, 2], [3, 3]]) # Define a 3 x 2 matrix. 

nparrayCentered = nparray2 - np.mean(nparray2, axis=0) # Remove the mean for each column

print('Original matrix')
print(nparray2)
print('Centered by columns matrix')
print(nparrayCentered)

print('New mean by column')
print(nparrayCentered.mean(axis=0))

Original matrix
[[1 1]
 [2 2]
 [3 3]]
Centered by columns matrix
[[-1. -1.]
 [ 0.  0.]
 [ 1.  1.]]
New mean by column
[0. 0.]


In [29]:
nparray2 = np.array([[1, 3], [2, 4], [3, 5]]) # Define a 3 x 2 matrix. 

nparrayCentered = nparray2.T - np.mean(nparray2, axis=1) # Remove the mean for each row
nparrayCentered = nparrayCentered.T # Transpose back the result

print('Original matrix')
print(nparray2)
print('Centered by rows matrix')
print(nparrayCentered)

print('New mean by rows')
print(nparrayCentered.mean(axis=1))

Original matrix
[[1 3]
 [2 4]
 [3 5]]
Centered by rows matrix
[[-1.  1.]
 [-1.  1.]
 [-1.  1.]]
New mean by rows
[0. 0. 0.]


In [30]:
nparray2 = np.array([[1, 3], [2, 4], [3, 5]]) # Define a 3 x 2 matrix. 

mean1 = np.mean(nparray2) # Static way
mean2 = nparray2.mean()   # Dynamic way

print(mean1, ' == ', mean2)

3.0  ==  3.0
