In [1]:
import numpy as np

In [2]:
x = np.array([1, 2, 3, 4])
x

array([1, 2, 3, 4])

In [3]:
A = np.array([[1, 2], [3, 4], [5, 6]])
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [4]:
A.shape

(3, 2)

In [5]:
x.shape

(4,)

In [6]:
len(x)

4

In [7]:
A = np.array([[1, 2], [3, 4], [5, 6]])
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [8]:
A_t = A.T
A_t

array([[1, 3, 5],
       [2, 4, 6]])

We can check the dimensions of the matrices:

In [9]:
A.shape

(3, 2)

In [10]:
A_t.shape

(2, 3)

In [11]:
A = np.array([[1, 2], [3, 4], [5, 6]])
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [12]:
B = np.array([[2, 5], [7, 4], [4, 3]])
B

array([[2, 5],
       [7, 4],
       [4, 3]])

In [13]:
# Add matrices A and B
C = A + B
C

array([[ 3,  7],
       [10,  8],
       [ 9,  9]])

In [14]:
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [15]:
# Exemple: Add 4 to the matrix A
C = A+4
C

array([[ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [16]:
A = np.array([[1, 2], [3, 4], [5, 6]])
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [17]:
B = np.array([[2], [4], [6]])
B

array([[2],
       [4],
       [6]])

In [18]:
# Broadcasting
C=A+B
C

array([[ 3,  4],
       [ 7,  8],
       [11, 12]])

In [19]:
x = np.arange(4)
x.shape

(4,)

In [20]:
# Adds a new dimension
x[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3]])

In [21]:
A = np.random.randn(4,3)
A

array([[-1.2849334 ,  0.27762136, -0.84998547],
       [ 0.630019  , -0.55143338, -0.03857505],
       [-0.87915029, -0.91051153, -1.45149577],
       [ 0.44174042, -0.72012797,  0.03073409]])

In [22]:
# This will throw an error
try:
    A - x
except ValueError:
    print("Operation cannot be completed. Dimension mismatch") 

Operation cannot be completed. Dimension mismatch


In [23]:
# But this works -- subtract each column of A by the column vector x
A - x[:, np.newaxis]

array([[-1.2849334 ,  0.27762136, -0.84998547],
       [-0.369981  , -1.55143338, -1.03857505],
       [-2.87915029, -2.91051153, -3.45149577],
       [-2.55825958, -3.72012797, -2.96926591]])

In [24]:
A = np.array([[1, 2], [3, 4], [5, 6]])
B = np.array([[2], [4]])
C = np.dot(A, B)
C

array([[10],
       [22],
       [34]])

In [25]:
C = A.dot(B)
C

array([[10],
       [22],
       [34]])

In [26]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [28]:
A = np.array([[3, 0, 2], [2, 0, -2], [0, 1, 1]])
A_inv = np.linalg.inv(A)
A_inv

array([[ 0.2,  0.2,  0. ],
       [-0.2,  0.3,  1. ],
       [ 0.2, -0.3, -0. ]])

## Principal Component Analysis
Principal Component Analysis, or PCA for short, is a method for reducing the dimensionality of data. It can be thought of as a projection method where data with m-columns (features) is projected into a subspace with m or fewer columns, whilst retaining the essence of the original data. 

In [29]:
# principal component analysis
from numpy import array
from numpy import mean
from numpy import cov
from numpy.linalg import eig

A = array([
    [1, 2],
    [3, 4],
    [5, 6]
])

# column means
M = mean(A.T, axis=1)
print(M)
# center columns by substracting column means
C = A - M
print(C)
# calculate covariance matrix of centered matrix
V = cov(C.T)
print(V)
# factorize covariance matrix
values, vectors = eig(V)
print(values)
print(vectors)
# project data
P = vectors.T.dot(C.T)
print(P.T)

[3. 4.]
[[-2. -2.]
 [ 0.  0.]
 [ 2.  2.]]
[[4. 4.]
 [4. 4.]]
[8. 0.]
[[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]
[[-2.82842712  0.        ]
 [ 0.          0.        ]
 [ 2.82842712  0.        ]]
