## Loading Data

In [60]:
n = dim(swiss)[1]
cat('n:', n)
x=as.matrix(swiss[, -1])
head(x)

n: 47

Unnamed: 0,Agriculture,Examination,Education,Catholic,Infant.Mortality
Courtelary,17.0,15,12,9.96,22.2
Delemont,45.1,6,9,84.84,22.2
Franches-Mnt,39.7,5,5,93.4,20.2
Moutier,36.5,12,7,33.77,20.3
Neuveville,43.5,17,15,5.16,20.6
Porrentruy,35.3,9,7,90.57,26.6


In [46]:
y = swiss$Fertility

## Eigendecomposition

In [115]:
cor(x)

Unnamed: 0,Agriculture,Examination,Education,Catholic,Infant.Mortality
Agriculture,1.0,-0.6865422,-0.63952252,0.4010951,-0.06085861
Examination,-0.68654221,1.0,0.6984153,-0.5727418,-0.1140216
Education,-0.63952252,0.6984153,1.0,-0.1538589,-0.09932185
Catholic,0.40109505,-0.5727418,-0.15385892,1.0,0.17549591
Infant.Mortality,-0.06085861,-0.1140216,-0.09932185,0.1754959,1.0


In [172]:
decomp2 = eigen(cor(x))
decomp2

eigen() decomposition
$values
[1] 2.6335008 1.0722340 0.8160316 0.3127902 0.1654433

$vectors
            [,1]        [,2]         [,3]       [,4]        [,5]
[1,]  0.52396452  0.25834215 -0.003003672  0.8090741  0.06411415
[2,] -0.57185792  0.01145981  0.039840522  0.4224580 -0.70198942
[3,] -0.49150243 -0.19028476 -0.539337412  0.3321615  0.56656945
[4,]  0.38530580 -0.36956307 -0.725888143 -0.1007965 -0.42176895
[5,]  0.09167606 -0.87197641  0.424976789  0.2154928  0.06488642


In [112]:
devecs = decomp2$vectors

0
1


In [114]:
round(t(devecs) %*% devecs, 10)

0,1,2,3,4
1,0,0,0,0
0,1,0,0,0
0,0,1,0,0
0,0,0,1,0
0,0,0,0,1


## SVD

### Normalize X

In [138]:
xnorm = apply(x, 2, function(z) ( (z - mean(z)) /(sd(z)) ))
head(xnorm)

Unnamed: 0,Agriculture,Examination,Education,Catholic,Infant.Mortality
Courtelary,-1.4820682,-0.18668632,0.1062125,-0.7477267,0.77503669
Delemont,-0.2447942,-1.31480509,-0.2057867,1.0477479,0.77503669
Franches-Mnt,-0.4825622,-1.44015162,-0.6217858,1.2529998,0.08838778
Moutier,-0.6234617,-0.56272591,-0.4137863,-0.1768099,0.12272023
Neuveville,-0.315244,0.06400674,0.4182118,-0.8628212,0.22571757
Porrentruy,-0.676299,-0.9387655,-0.4137863,1.185142,2.28566429


In [176]:
# another way
Jnn = matrix(rep(1, n*n), ncol=n)
xc = (diag(1, n, n) - Jnn/n) %*% x
sqrt(diag(var(x)))
cat('*** xnorm:')
head(sweep(xc, 2, sqrt(diag(var(x))), FUN='/'))

*** xnorm:

Agriculture,Examination,Education,Catholic,Infant.Mortality
-1.4820682,-0.18668632,0.1062125,-0.7477267,0.77503669
-0.2447942,-1.31480509,-0.2057867,1.0477479,0.77503669
-0.4825622,-1.44015162,-0.6217858,1.2529998,0.08838778
-0.6234617,-0.56272591,-0.4137863,-0.1768099,0.12272023
-0.315244,0.06400674,0.4182118,-0.8628212,0.22571757
-0.676299,-0.9387655,-0.4137863,1.185142,2.28566429


In [175]:
decomp3 = svd(xnorm)
names(decomp3)
decomp3$v
cat('*** reconstruct x(norm):')
head(decomp3$u %*% diag(decomp3$d) %*% t(decomp3$v))

0,1,2,3,4
0.52396452,-0.25834215,0.003003672,-0.8090741,0.06411415
-0.57185792,-0.01145981,-0.039840522,-0.422458,-0.70198942
-0.49150243,0.19028476,0.539337412,-0.3321615,0.56656945
0.3853058,0.36956307,0.725888143,0.1007965,-0.42176895
0.09167606,0.87197641,-0.424976789,-0.2154928,0.06488642


*** reconstruct x(norm):

0,1,2,3,4
-1.4820682,-0.18668632,0.1062125,-0.7477267,0.77503669
-0.2447942,-1.31480509,-0.2057867,1.0477479,0.77503669
-0.4825622,-1.44015162,-0.6217858,1.2529998,0.08838778
-0.6234617,-0.56272591,-0.4137863,-0.1768099,0.12272023
-0.315244,0.06400674,0.4182118,-0.8628212,0.22571757
-0.676299,-0.9387655,-0.4137863,1.185142,2.28566429


## Correlation Checking
Correlation Formula:

\begin{align}
Cor(X, Y) = \frac{Cov(X, Y)}{\sigma_{x}\sigma_{y}}
\end{align}

Also:

\begin{align}
Cov(X, Y) = <\tilde{X}, \tilde{Y}>
\end{align}

In [55]:
x1 = x[, 1]
x2 = x[, 2]

In [56]:
cor(x1, x2)

### Column Mean Centering

In [101]:
Jn = matrix(rep(1, n), ncol=1)
t(Jn)
cat('dim:', dim(Jn))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


dim: 47 1

In [83]:
x1c = x1 - Jn %*% solve(t(Jn) %*% Jn)  %*% t(Jn) %*% x1
t(x1c)
unname(t(x1 - mean(x1)))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
-33.65957,-5.559574,-10.95957,-14.15957,-7.159574,-15.35957,19.54043,17.14043,2.640426,-5.459574,...,12.44043,-12.25957,-42.95957,-33.95957,-33.05957,-13.05957,-31.95957,-49.45957,-4.059574,-22.95957


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
-33.65957,-5.559574,-10.95957,-14.15957,-7.159574,-15.35957,19.54043,17.14043,2.640426,-5.459574,...,12.44043,-12.25957,-42.95957,-33.95957,-33.05957,-13.05957,-31.95957,-49.45957,-4.059574,-22.95957


In [85]:
x2c = x2 - Jn %*% solve(t(Jn) %*% Jn)  %*% t(Jn) %*% x2
t(x2c)
unname(t(x2 - mean(x2)))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
-1.489362,-10.48936,-11.48936,-4.489362,0.5106383,-7.489362,-0.4893617,-2.489362,-4.489362,-0.4893617,...,-3.489362,9.510638,12.51064,5.510638,18.51064,-1.489362,8.510638,20.51064,-0.4893617,5.510638


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
-1.489362,-10.48936,-11.48936,-4.489362,0.5106383,-7.489362,-0.4893617,-2.489362,-4.489362,-0.4893617,...,-3.489362,9.510638,12.51064,5.510638,18.51064,-1.489362,8.510638,20.51064,-0.4893617,5.510638


### Covariance

In [92]:
cov(x1, x2)

t(x1c) %*% x2c / (n-1)

0
-124.3928


### Correlation

In [94]:
t(x1c) %*% x2c / (n-1)  / sqrt(var(x1) * var(x2))
cor(x1, x2)

0
-0.6865422
