In [1]:
import numpy as np
from numpy import linalg
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

In [2]:
# We first calculate the SVD of the matrix
X = np.array([[1,1],[-1,0],[0,-1]])
X

array([[ 1,  1],
       [-1,  0],
       [ 0, -1]])

In [3]:
X_svd = np.linalg.svd(X)

In [4]:
X_svd

SVDResult(U=array([[-8.16496581e-01, -1.85577521e-16,  5.77350269e-01],
       [ 4.08248290e-01, -7.07106781e-01,  5.77350269e-01],
       [ 4.08248290e-01,  7.07106781e-01,  5.77350269e-01]]), S=array([1.73205081, 1.        ]), Vh=array([[-0.70710678, -0.70710678],
       [ 0.70710678, -0.70710678]]))

In [5]:
V_transpose = X_svd[2]
V = V_transpose.T
V

array([[-0.70710678,  0.70710678],
       [-0.70710678, -0.70710678]])

In [6]:
# We now check if V is indeed the principal component
pca = PCA(n_components=2)
pca.fit(X)

In [7]:
pca.components_

array([[ 0.70710678,  0.70710678],
       [ 0.70710678, -0.70710678]])

In [8]:
Z = pca.transform(X)
Z

array([[ 1.41421356e+00, -3.33066907e-16],
       [-7.07106781e-01, -7.07106781e-01],
       [-7.07106781e-01,  7.07106781e-01]])

In [26]:
# We now apply PCA to Iris Example
X,y = load_iris(return_X_y=True)
pca = PCA(n_components=2)
pca.fit(X)
Z = pca.transform(X)

In [27]:
pca.components_

array([[ 0.36138659, -0.08452251,  0.85667061,  0.3582892 ],
       [ 0.65658877,  0.73016143, -0.17337266, -0.07548102]])

In [31]:
pca.explained_variance_ratio_

array([0.92461872, 0.05306648])

In [32]:
model_logistics = LogisticRegression(random_state = 0, max_iter=1000).fit(Z,y)
true_values = y
predictions = model_logistics.predict(Z)
f1_score(true_values,predictions,average='weighted')

0.9666633329999667