In [3]:
import sys
import os
import numpy as np


notebook_dir = os.path.dirname(os.path.abspath("__file__"))
src_path = os.path.join(notebook_dir, "..", "code")
if src_path not in sys.path:
    sys.path.append(src_path)

from PCA import PCA

## Generate and visualize random 2D Gaussian samples

In [4]:
import numpy as np
import plotly.express as px
import pandas as pd
from sklearn.decomposition import PCA as coucoujesuisunePCA

#fix seed
#np.random.seed(650)
np.random.seed(420)

mean = [10,6]
covariance = [[10,7], [7,10]]
num_points = 30

X = np.random.multivariate_normal(mean, covariance, num_points)


pca = PCA(nb_components=1)

pca.fit(X)
projections = pca.transform(X)
eigenvector = pca.components
print(X)

[[13.74847276  5.08181795]
 [11.91820087  7.15017311]
 [ 7.88636449  6.18530185]
 [11.89393124  7.41928072]
 [ 4.11344552  4.23165259]
 [13.17410666  7.30485709]
 [13.97330244  4.69363179]
 [ 8.03360077  4.96247084]
 [11.172389    8.90889083]
 [ 9.12122483  8.90786117]
 [ 9.97943791  2.83214487]
 [15.69821811 10.43715921]
 [11.01815129  2.8241422 ]
 [ 5.08341501  3.16990141]
 [12.05190567 10.81301992]
 [ 6.92484171  6.18305239]
 [10.55785897  1.66297971]
 [ 8.63689119  5.379031  ]
 [12.51536891  9.42100592]
 [11.45531941 10.32466602]
 [ 1.97858388  0.68639235]
 [10.7819467   8.51523819]
 [10.41370275  4.62385504]
 [12.27418954  6.81421035]
 [11.87563565  7.91868145]
 [13.56441947  7.46944675]
 [12.56235723  3.06725181]
 [10.61950239  3.85324597]
 [12.90531581  6.37146914]
 [16.21817166  7.64341072]]


In [5]:
df = pd.DataFrame(X, columns=["x", "y"])

fig = px.scatter(
    df,
    x="x",
    y="y",
    title="Visualisation des points d'une distribution gaussienne 2D",
    labels={"x": "Dimension X", "y": "Dimension Y"},
    template="plotly_white"
)

fig.update_traces(marker=dict(size=8, color='blue', line=dict(width=1, color='black')))
fig.update_layout(
    title_font_size=20,
    xaxis_title="Dimension X",
    yaxis_title="Dimension Y")

fig.show()


In [6]:
from utils_visualization import plot_fig_projections

mean_X = np.mean(X, axis=0)
projections_2d = projections @ eigenvector.T + mean_X

plot_fig_projections(X, projections_2d)

## Obtain same plot with PPCA with given sigma, then classical PPCA

### Given sigma2:

In [7]:
from PCA import PPCA
from utils_stats import get_likelihood

sigma2 = 10
ppca_sigma_fixed = PPCA(nb_components=1, sigma2 = sigma2)

ppca_sigma_fixed.fit(X)
projections_sigma_fixed = ppca_sigma_fixed.transform(X)
W = ppca_sigma_fixed.W
eigenvector = ppca_sigma_fixed.components

projections_ppca_sigma_fixed_2d = projections_sigma_fixed @ W.T + mean_X
plot_fig_projections(X, projections_ppca_sigma_fixed_2d, line_scale = 4.5)
print(get_likelihood(X, W, sigma2=sigma2))

-149.7067918595382


### With the sigma square that maximizes the likelihood of the model

In [8]:
from PCA import PPCA

ppca = PPCA(nb_components=1)

ppca.fit(X)
projections = ppca.transform(X)
W = ppca.W
eigenvector = ppca.components

projections_ppca_2d = projections @ W.T + mean_X
plot_fig_projections(X, projections_ppca_2d, line_scale = 1.9)
print(get_likelihood(X, W, sigma2=ppca.sigma2))
print(ppca.sigma2)
print(W)

-145.227545979291
4.120809684574525
[[-2.48437584]
 [-1.74277741]]


### Test of EM algorithm:

In [9]:
from PCA import EM_for_PPCA

nb_components = 1
W_0 = - np.ones((X.shape[1], nb_components))
sigma2_0 =15
W_EM, sigma2_EM, time = EM_for_PPCA(X, nb_components, W_0, sigma2_0, max_iter=1000000, plot_time=True)

Epoch 0
[[-1.]
 [-1.]]
15
Epoch 100000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 200000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 300000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 400000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 500000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 600000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 700000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 800000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369
Epoch 900000
[[-2.44261865]
 [-1.71348494]]
3.983449361755369


In [10]:
print(sigma2_EM)
print(W_EM)
print(get_likelihood(X, W_EM, sigma2_EM))
print(round(time, 2))

3.983449361755369
[[-2.44261865]
 [-1.71348494]]
-145.24498218764123
14.42


### Test with slightly different distribution

In [11]:
mean = [10,6]
covariance = [[10,0], [0,10]]
num_points = 30

np.random.seed(650)
X_bis = np.random.multivariate_normal(mean, covariance, num_points)
ppca = PPCA(nb_components=1)

ppca.fit(X_bis)
projections = ppca.transform(X_bis)
W = ppca.W
eigenvector = ppca.components

projections_ppca_2d = projections @ W.T + mean_X
plot_fig_projections(X_bis, projections_ppca_2d, line_scale = 3)
print(get_likelihood(X_bis, W, sigma2=ppca.sigma2))
print(ppca.sigma2)


-148.14738906698898
7.788223336216858


### Test of EM algorithm