# Linear Algebra with examples using Numpy

In [1]:
# do this every time you write code, ever!
import numpy as np

## Vectors

A vector can be represented by an array of real numbers

$$\mathbf{x} = [x_1, x_2, \ldots, x_n]$$

Geometrically, a vector specifies the coordinates of the tip of the vector if the tail were placed at the origin

In [2]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
x.shape

(10,)

### Vector Indexing

In [4]:
# index into array
x[4]

4

In [5]:
# can multi-index into numpy array 
x[[2, 3, 5]]

array([2, 3, 5])

In [6]:
# if you aren't sure of how long an array is...
x[-1]

9

In [7]:
# can grab range of indices (slice)
x[:5]

array([0, 1, 2, 3, 4])

### Vector Norm

The norm of a vector $\mathbf{x}$ is defined by

$$||\boldsymbol{x}|| = \sqrt{x_1^2 + x_2^2 + \cdots + x_n^2}$$

In [8]:
print(np.sqrt(np.sum(x**2)))
print(np.linalg.norm(x))

16.881943016134134
16.881943016134134


In [9]:
# the norm is "L2" by default, but you can change that 
np.linalg.norm(x, ord=1)

45.0

In [12]:
np.abs(x).sum()

45

### Arithmetic Operations

Adding a constant to a vector adds the constant to each element


$$a + \boldsymbol{x} = [a + x_1, a + x_2, \ldots, a + x_n]$$

In [13]:
x + 1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

Multiplying a vector by a constant multiplies each term by the constant.


$$a \boldsymbol{x} = [ax_1, ax_2, \ldots, ax_n]$$

In [14]:
x*3

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [15]:
x/10

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [16]:
x**2

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

### Linear Combinations of Vectors

If we have two vectors $\boldsymbol{x}$ and $\boldsymbol{y}$ of the same length $(n)$, then

$$\boldsymbol{x} + \boldsymbol{y} = [x_1+y_1, x_2+y_2, \ldots, x_n+y_n]$$

In [17]:
0.5*x**2 - 2*x

array([ 0. , -1.5, -2. , -1.5,  0. ,  2.5,  6. , 10.5, 16. , 22.5])

In [18]:
y = np.arange(len(x))

In [19]:
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [20]:
x.shape, y.shape

((10,), (10,))

In [21]:
x - y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

A _linear combination_ of a collection of vectors $(\boldsymbol{x}_1,
                                                    \boldsymbol{x}_2, \ldots,
                                                    \boldsymbol{x}_m)$ 
is a vector of the form

$$a_1 \cdot \boldsymbol{x}_1 + a_2 \cdot \boldsymbol{x}_2 + 
\cdots + a_m \cdot \boldsymbol{x}_m$$

In [22]:
a1=2
x1 = np.array([1,2,3,4])
print(a1*x1)
a2=4
x2 = np.array([5,6,7,8])
print(a2*x2)
print(a1*x1 + a2*x2)

[2 4 6 8]
[20 24 28 32]
[22 28 34 40]


### Vector Dot Product

If we have two vectors $\boldsymbol{x}$ and $\boldsymbol{y}$ of the same length $(n)$, then the _dot product_ is given by

$$\boldsymbol{x} \cdot \boldsymbol{y} = x_1y_1 + x_2y_2 + \cdots + x_ny_n$$

In [23]:
y = np.arange(len(x)+1, 1, -1)
print('x: ', x)
print('y: ', y)
np.dot(x,y)

x:  [0 1 2 3 4 5 6 7 8 9]
y:  [11 10  9  8  7  6  5  4  3  2]


210

In [24]:
dot = 0
for X, Y in zip(x, y):
    dot += X*Y

print(dot)    

210


If $\mathbf{x} \cdot \mathbf{y} = 0$ then $x$ and $y$ are *orthogonal* (aligns with the intuitive notion of perpendicular)

In [26]:
w = np.array([1, 2])
v = np.array([-2, 1])
np.dot(w,v)

0

The norm squared of a vector is just the vector dot product with itself
$$
||x||^2 = x \cdot x
$$

In [27]:
print(np.linalg.norm(x)**2)
print(np.dot(x,x))

285.00000000000006
285


The distance between two vectors is the norm of the difference.
$$
d(x,y) = ||x-y||
$$

In [28]:
np.linalg.norm(x-y)

19.235384061671343

In [29]:
((x - y)**2).sum()**0.5

19.235384061671343

### Cosine Similarity (Another distance metric)

_Cosine Similarity_ is the cosine of the angle between the two vectors, given by

$$cos(\theta) = \frac{\boldsymbol{x} \cdot \boldsymbol{y}}{||\boldsymbol{x}|| \text{ } ||\boldsymbol{y}||}$$



In [30]:
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))

0.9688639316269662

In [31]:
# now we'll check the cosine similarity of a, and a vector with exactly twice the magnitude of a
np.dot(a,a*2)/(np.linalg.norm(a)*np.linalg.norm(a*2))

1.0

### Masking

You can also broadcast a boolean condition to an entire vector

In [32]:
a = np.random.randint(-10, 10, size = (20,))
b = np.random.randint(-10, 10, size = (20,))

In [33]:
a

array([-3, -5, -3,  1,  3, -5, -3, -3,  2,  8, -3, -6, -5, -9, -5,  2,  5,
       -2, -7, -2])

In [34]:
b

array([  2,  -8,   3,   8,   6,   7,  -4,  -4, -10,  -5,   3,   8,  -5,
        -7,   1,  -8,   4,   9,   5,   4])

In [35]:
# this returns a boolean array
a >= 0

array([False, False, False,  True,  True, False, False, False,  True,
        True, False, False, False, False, False,  True,  True, False,
       False, False])

In [36]:
# this returns only the elements of b where the condition is True
b[a >= 0]

array([  8,   6, -10,  -5,  -8,   4])

In [37]:
# you can perform arthmetic operations on the boolean array
print((a >= 0).sum()) # tells you how many elements are true
print((a >= 0).mean()) # gives you the average rate at which elements are true

6
0.3


In [38]:
b[(a >= 0) & (b >= 0)]

array([8, 6, 4])

# Matrices

An $n \times p$ matrix is an array of numbers with $n$ rows and $p$ columns:

$$
X =
  \begin{bmatrix}
    x_{11} & x_{12} & \cdots & x_{1p} \\
    x_{21} & x_{22} & \cdots & x_{2p} \\
    \vdots & \vdots & \ddots & \vdots \\
    x_{n1} & x_{n2} & \cdots & x_{np} 
  \end{bmatrix}
$$

$n$ = the number of subjects  
$p$ = the number of features

For the following $2 \times 3$ matrix
$$
X =
  \begin{bmatrix}
    1 & 2 & 3\\
    4 & 5 & 6
  \end{bmatrix}
$$

We can create in Python using NumPY

In [40]:
X = np.array([[1,2,3],[4,5,6]])
print(X)
print(X.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [41]:
X[1]

array([4, 5, 6])

In [42]:
X[1, :]

array([4, 5, 6])

In [43]:
X[:, 1]

array([2, 5])

### Basic Properties
Let $X$ and $Y$ be matrices **of the dimension $n \times p$**. Let $x_{ij}$ $y_{ij}$ for $i=1,2,\ldots,n$ and $j=1,2,\ldots,p$ denote the entries in these matrices, then

1. $X+Y$ is the matrix whose $(i,j)^{th}$ entry is $x_{ij} + y_{ij}$
2. $X-Y$ is the matrix whose $(i,j)^{th}$ entry is $x_{ij} - y_{ij}$
3. $aX$, where $a$ is any real number, is the matrix whose $(i,j)^{th}$ entry is $ax_{ij}$ 

In [44]:
X = np.array([[1,2,3],[4,5,6]])
print(X)
Y = np.array([[7,8,9],[10,11,12]])
print(Y)
print(X+Y)

[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
[[ 8 10 12]
 [14 16 18]]


In [45]:
X = np.array([[1,2,3],[4,5,6]])
print(X)
Y = np.array([[7,8,9],[10,11,12]])
print(Y)
print(X-Y)

[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
[[-6 -6 -6]
 [-6 -6 -6]]


In [46]:
X = np.array([[1,2,3],[4,5,6]])
print(X)
a=5
print(a*X)

[[1 2 3]
 [4 5 6]]
[[ 5 10 15]
 [20 25 30]]


In order to multiply two matrices, they must be _conformable_ such that the number of columns of the first matrix must be the same as the number of rows of the second matrix.

Let $X$ be a matrix of dimension $n \times k$ and let $Y$ be a matrix of dimension $k \times p$, then the product $XY$ will be a matrix of dimension $n \times p$ whose $(i,j)^{th}$ element is given by the dot product of the $i^{th}$ row of $X$ and the $j^{th}$ column of $Y$

$$\sum_{s=1}^k x_{is}y_{sj} = x_{i1}y_{1j} + \cdots + x_{ik}y_{kj}$$




### Note: 

$$XY \neq YX$$

If $X$ and $Y$ are square matrices of the same dimension, then the both the product $XY$ and $YX$ exist; however, there is no guarantee the two products will be the same


In [47]:
X = np.array([[2,1,0], [-1,2,3]])
Y = np.array([[0,-2], [1,2], [1,1]])
X

array([[ 2,  1,  0],
       [-1,  2,  3]])

In [48]:
Y

array([[ 0, -2],
       [ 1,  2],
       [ 1,  1]])

In [57]:
X.shape, Y.shape

((2, 3), (3, 2))

In [54]:
X.dot(Y)

array([[ 1, -2],
       [ 5,  9]])

In [55]:
Y.dot(X)

array([[ 2, -4, -6],
       [ 0,  5,  6],
       [ 1,  3,  3]])

In [58]:
np.array_equal(X.dot(Y), Y.dot(X))

False

### Matrix Multiplication

In [59]:
A = np.array([[2, 2], [2, 2]])
B = np.array([[1, 0], [0, 1]])

print(A)
print(B)

[[2 2]
 [2 2]]
[[1 0]
 [0 1]]


In [60]:
np.eye(2)

array([[1., 0.],
       [0., 1.]])

In [61]:
# what do we think will happen here?
A*B

array([[2, 0],
       [0, 2]])

In [62]:
A.dot(B)

array([[2, 2],
       [2, 2]])

In [None]:
np.dot(A, B)

## For your reference


### Additional Properties of Matrices
1. If $X$ and $Y$ are both $n \times p$ matrices,
then $$X+Y = Y+X$$
2. If $X$, $Y$, and $Z$ are all $n \times p$ matrices,
then $$X+(Y+Z) = (X+Y)+Z$$
3. If $X$, $Y$, and $Z$ are all conformable,
then $$X(YZ) = (XY)Z$$
4. If $X$ is of dimension $n \times k$ and $Y$ and $Z$ are of dimension $k \times p$, then $$X(Y+Z) = XY + XZ$$
5. If $X$ is of dimension $p \times n$ and $Y$ and $Z$ are of dimension $k \times p$, then $$(Y+Z)X = YX + ZX$$
6. If $a$ and $b$ are real numbers, and $X$ is an $n \times p$ matrix,
then $$(a+b)X = aX+bX$$
7. If $a$ is a real number, and $X$ and $Y$ are both $n \times p$ matrices,
then $$a(X+Y) = aX+aY$$
8. If $z$ is a real number, and $X$ and $Y$ are conformable, then
$$X(aY) = a(XY)$$

### Matrix Transpose

The transpose of an $n \times p$ matrix is a $p \times n$ matrix with rows and columns interchanged

$$
X^T =
  \begin{bmatrix}
    x_{11} & x_{12} & \cdots & x_{1n} \\
    x_{21} & x_{22} & \cdots & x_{2n} \\
    \vdots & \vdots & \ddots & \vdots \\
    x_{p1} & x_{p2} & \cdots & x_{pn} 
  \end{bmatrix}
$$



In [63]:
X

array([[ 2,  1,  0],
       [-1,  2,  3]])

In [64]:
X.T

array([[ 2, -1],
       [ 1,  2],
       [ 0,  3]])

In [65]:
A.T

array([[2, 2],
       [2, 2]])

### Properties of Transpose
1. Let $X$ be an $n \times p$ matrix and $a$ a real number, then 
$$(cX)^T = cX^T$$
2. Let $X$ and $Y$ be $n \times p$ matrices, then
$$(X \pm Y)^T = X^T \pm Y^T$$
3. Let $X$ be an $n \times k$ matrix and $Y$ be a $k \times p$ matrix, then
$$(XY)^T = Y^TX^T$$

### Vector in Matrix Form
A column vector is a matrix with $n$ rows and 1 column and to differentiate from a standard matrix $X$ of higher dimensions can be denoted as a bold lower case $\boldsymbol{x}$

$$
\boldsymbol{x} =
  \begin{bmatrix}
    x_{1}\\
    x_{2}\\
    \vdots\\
    x_{n}
  \end{bmatrix}
$$

In numpy, when we enter a vector, it will not normally have the second dimension, so we can reshape it

In [66]:
x = np.array([1,2,3,4])
print(x)
print(x.shape)

[1 2 3 4]
(4,)


In [67]:
y = np.array([[1,2,3,4]])
print(y)
print(y.shape)

[[1 2 3 4]]
(1, 4)


In [68]:
y = np.array([[1,2,3,4]]).T
print(y)
print(y.shape)

[[1]
 [2]
 [3]
 [4]]
(4, 1)


In [69]:
# this gives a column vector
np.arange(1, 5).reshape(-1, 1)

array([[1],
       [2],
       [3],
       [4]])

In [70]:
# this gives a row vector
# this gives a column vector
np.arange(1, 5).reshape(1, -1)

array([[1, 2, 3, 4]])

In [73]:
b = np.arange(1, 5).reshape(-1,1)

b

array([[1],
       [2],
       [3],
       [4]])

In [74]:
x.T

array([1, 2, 3, 4])

In [75]:
x.reshape(-1, 1)

array([[1],
       [2],
       [3],
       [4]])

In [76]:
x.reshape(1, -1)

array([[1, 2, 3, 4]])

In [77]:
y = x.reshape(4, 1)
z = x[:, np.newaxis]

y.shape

(4, 1)

In [79]:
z

array([[1],
       [2],
       [3],
       [4]])

In [80]:
w = x.reshape(1, -1)
w

array([[1, 2, 3, 4]])

In [81]:
w.shape

(1, 4)

In [82]:
y

array([[1],
       [2],
       [3],
       [4]])

and a row vector is generally written as the transpose

$$\boldsymbol{x}^T = [x_1, x_2, \ldots, x_n]$$

In [83]:
z.shape

(4, 1)

In [84]:
A = np.arange(1, 10).reshape(3, 3)
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [85]:
b = np.array([[1, 1, 1]]).T
b.shape

(3, 1)

In [86]:
A.dot(b)

array([[ 6],
       [15],
       [24]])

If we have two vectors $\boldsymbol{x}$ and $\boldsymbol{y}$ of the same length $(n)$, then the _dot product_ is give by matrix multiplication

$$\boldsymbol{x}^T \boldsymbol{y} =   
    \begin{bmatrix} x_1& x_2 & \ldots & x_n \end{bmatrix}
    \begin{bmatrix}
    y_{1}\\
    y_{2}\\
    \vdots\\
    y_{n}
  \end{bmatrix}  =
  x_1y_1 + x_2y_2 + \cdots + x_ny_n$$

## Inverse of a Matrix

The inverse of a square $n \times n$ matrix $X$ is an $n \times n$ matrix $X^{-1}$ such that 

$$X^{-1}X = XX^{-1} = I$$

Where $I$ is the identity matrix, an $n \times n$ diagonal matrix with 1's along the diagonal. 

If such a matrix exists, then $X$ is said to be _invertible_ or _nonsingular_, otherwise $X$ is said to be _noninvertible_ or _singular_.

In [87]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [88]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [89]:
X = np.array([[1,2,3], [0,1,0], [-2, -1, 0]])
Y = np.linalg.inv(X)

In [90]:
Y

array([[-0.        , -0.5       , -0.5       ],
       [ 0.        ,  1.        ,  0.        ],
       [ 0.33333333, -0.5       ,  0.16666667]])

## Note: Inverse and Transpose are not the same thing!
(but in special cases, may be equal)

In [101]:
xx = np.random.random((4,4))
np.array_equal(xx.T, np.linalg.inv(xx))

False

In [102]:
X.dot(Y).astype(int)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

### Properties of Inverse
1. If $X$ is invertible, then $X^{-1}$ is invertible and
$$(X^{-1})^{-1} = X$$
2. If $X$ and $Y$ are both $n \times n$ invertible matrices, then $XY$ is invertible and
$$(XY)^{-1} = Y^{-1}X^{-1}$$

#### Note the change from left-multiply to right-multiply

3. If $X$ is invertible, then $X^T$ is invertible and
$$(X^T)^{-1} = (X^{-1})^T$$

### Orthogonal Matrices

Let $X$ be an $n \times n$ matrix such than $X^TX = I$, then $X$ is said to be orthogonal which implies that $X^T=X^{-1}$

This is equivalent to saying that the columns of $X$ are all orthogonal to each other (and have unit length).

## Matrix Equations

A system of equations of the form:
\begin{align*}
    a_{11}x_1 + \cdots + a_{1n}x_n &= b_1 \\
    \vdots \hspace{1in} \vdots \\
    a_{m1}x_1 + \cdots + a_{mn}x_n &= b_m 
\end{align*}
can be written as a matrix equation:
$$
A\mathbf{x} = \mathbf{b}
$$
and hence, has solution
$$
\mathbf{x} = A^{-1}\mathbf{b}
$$

## Eigenvectors and Eigenvalues

Let $A$ be an $n \times n$ matrix and $\boldsymbol{x}$ be an $n \times 1$ nonzero vector. An _eigenvalue_ of $A$ is a number $\lambda$ such that

$$A \boldsymbol{x} = \lambda \boldsymbol{x}$$


A vector $\boldsymbol{x}$ satisfying this equation is called an eigenvector associated with $\lambda$

Eigenvectors and eigenvalues will play a huge roll in matrix methods later in the course (PCA, SVD, NMF).

In [103]:
A = np.array([[1, 1], [1, 2]])
vals, vecs = np.linalg.eig(A)

In [104]:
vals

array([0.38196601, 2.61803399])

In [105]:
vecs

array([[-0.85065081, -0.52573111],
       [ 0.52573111, -0.85065081]])

In [106]:
vec0 = vecs[:, 0].reshape(-1, 1)

In [107]:
vec1 = vecs[:, 1].reshape(-1, 1)

In [108]:
vec0

array([[-0.85065081],
       [ 0.52573111]])

In [109]:
A.dot(vec0)

array([[-0.3249197 ],
       [ 0.20081142]])

In [110]:
vals[0]*vec0

array([[-0.3249197 ],
       [ 0.20081142]])

In [111]:
A.dot(vec1)

array([[-1.37638192],
       [-2.22703273]])

In [112]:
vals[1]*vec1

array([[-1.37638192],
       [-2.22703273]])

In [None]:
# squares numbers in an array in a for loop
def for_loop_square(ary):
    squares = []
    for elt in ary:
        squares.append(elt**2)
    return squares

# squares numbers in an array in a for loop
def list_comp_square(ary):
    squares = [elt**2 for elt in ary]
    return squares

# uses vectorizing
def numpy_rules(ary):
    squares = ary**2
    return squares

In [None]:
test_ary = np.arange(1000)

In [None]:
timeit for_loop_square(test_ary)

In [None]:
timeit list_comp_square(test_ary)

In [None]:
timeit numpy_rules(test_ary)

### Matrix Breakout Solutions

In [None]:
# instantiate matrices
A = np.random.randint(1, 101, size = (10, 10))
B = np.identity(10)    

In [None]:
# matrix multiplication #1
A.dot(B)

In [None]:
# confirm that multiplying A by the identity reproduces itself!
(A.dot(B) == A).all()

In [None]:
# matrix transpose #2
A.T

In [None]:
# inverses #3
A_inv = np.linalg.inv(A)

# confirm that A times its inverse gives the identity
A.dot(A_inv).round().astype(int)

In [None]:
# subsetting A #4
A_subset = A[:, :4]

print(A_subset)

# we get an error here, as we should!
A_subset.dot(B)

In [None]:
# this is a (4, 10) by a (10, 10), so this works as expected
B.dot(A_subset)