# PCA on random matrices

af, started 22.10.26

## Import librairies

In [1]:
import timeit
import numpy as np
import matplotlib.pyplot as plt
import pydiodon as dio
import cppdiodon as cppdio

loading pydiodon - version 22.10.29


## General approach

### Select the dimension of the random matrix

matrix $A$ has $m$ rows and $n$ columns.

In [2]:
m = 1500
n = 1500

### Select the model for building a random matrix

Models are:
- `uniform` for a uniform law in $[-1,1]$
- `gauss` for a normal law $\mathcal{N}(0,1)$  of mean 0 and variance 1

In [3]:
meth = "gauss"

### Building the random matrix

In [4]:
if meth=="uniform":
    A = -1 + 2*np.random.random((m,n))
if meth=="gauss":
    A = np.random.randn(m,n)

### Running the PCA of the random matrix

#### py and method is svd

In [5]:
t_start   = timeit.default_timer()
Ys, Ls, Vs = dio.pca(A, meth="svd")
t_stop    = timeit.default_timer()
print("\nDuration with SVD is", t_stop-t_start)


-----------------------------------------
running pca(), version 21.05.05
Matrix A has 1500 rows and 1500 columns
rank is -1 (full rank if k = -1)
pretreatment is standard
method is svd
------------------------------------------


Duration with SVD is 1.2329449039998508


#### cpp and method is svd

In [None]:
t_start   = timeit.default_timer()
Ys, Ls, Vs = cppdio.pca(A, meth="svd")
t_stop    = timeit.default_timer()
print("\nDuration with SVD is", t_stop-t_start)

####  Method is rSVD

In [None]:
k = 500
t_start   = timeit.default_timer()
Yg, Lg, Vg = dio.pca(A, k = k, meth="grp")
t_stop    = timeit.default_timer()
print("\nDuration with rSVD is", t_stop-t_start)

### Comparison of singular values between SVD and rSVD

In [None]:
plt.plot(Ls, c="green")
plt.plot(Lg, c="blue")
plt.xlabel("rank")
plt.ylabel("Singular value")
plt.show()

- Singular values computed with SVD are in green
- Singular values computed with rSVD are in blue

### Plotting the components

In [None]:
dio.plot_components_heatmap(Ys, bins=64)

## That's all folks!