In [None]:
import numpy as np
np.set_printoptions(precision=2, suppress=True)

In [None]:
A = np.array([[1,2,0],
              [0,0,5],
              [3,-4,2],
              [1,6,5],
              [0,1,0]])
A

array([[ 1,  2,  0],
       [ 0,  0,  5],
       [ 3, -4,  2],
       [ 1,  6,  5],
       [ 0,  1,  0]])

In [None]:
U, s, W = np.linalg.svd(A)

# Транспонируем матрицу W
V = W.T

# s - список диагональных элементов, его нужно привести к виду диагональной матрицы для наглядности
D = np.zeros_like(A, dtype=float)
D[np.diag_indices(min(A.shape))] = s

$$A=UDV^{T}$$

In [None]:
np.dot(np.dot(U, D), V.T)

array([[ 1.,  2.,  0.],
       [ 0., -0.,  5.],
       [ 3., -4.,  2.],
       [ 1.,  6.,  5.],
       [-0.,  1., -0.]])

In [None]:
print(f'Матрица D:\n{D}')
print(f'Матрица U:\n{U}')
print(f'Матрица V:\n{V}')

Матрица D:
[[8.82 0.   0.  ]
 [0.   6.14 0.  ]
 [0.   0.   2.53]
 [0.   0.   0.  ]
 [0.   0.   0.  ]]
Матрица U:
[[ 0.17  0.16 -0.53 -0.8  -0.16]
 [ 0.39 -0.53  0.61 -0.43  0.03]
 [-0.14 -0.82 -0.52  0.14  0.07]
 [ 0.89  0.06 -0.25  0.38 -0.06]
 [ 0.08  0.11 -0.08 -0.11  0.98]]
Матрица V:
[[ 0.07 -0.37 -0.93]
 [ 0.72  0.67 -0.21]
 [ 0.69 -0.65  0.31]]


In [None]:
len(s) == np.linalg.matrix_rank(A) == min(A.shape[0], A.shape[1])

True

$$\left \| A \right \|_{E}^{2}=\text{max}\left (\frac{(Ax,Ax)}{(x,x)}\right )=\text{max}\left (\frac{(UDV^{T}x,UDV^{T}x)}{(V^{T}x,V^{T}x)}\right )=\text{max}\left (\frac{(Dz,Dz)}{(z,z)}\right ).$$

$$\left \| A \right \|_{E}=\mu_{1}.$$

In [None]:
max(s)

8.824868854820444

$$\left \| A \right \|_{F}=\sqrt{\sum_{i=1}^{m}\sum_{j=1}^{n}a_{ij}^{2}}.$$

$$\left \| A \right \|_{F}=\sqrt{\sum_{k=1}^{r}\mu_{k}^{2}}.$$

In [None]:
print(s)
np.linalg.norm(s)

[8.82 6.14 2.53]


11.045361017187263

In [None]:
np.sqrt(np.sum(s**2))

11.045361017187263

In [None]:
np.sqrt(np.power(A,2).sum(axis=1).sum())

11.045361017187261

Playground

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.decomposition import PCA, TruncatedSVD, KernelPCA

In [None]:
class SVD(BaseEstimator, TransformerMixin):
    def __init__(self, k=2):
        self.k = k

    def fit(self, X, y=None):
        return self        
    
    def transform(self, X):
        X_ = X.copy()
        U, s, W = np.linalg.svd(X_)
        V = W.T
        
        # for reconstruction
        #D = np.zeros_like(X_, dtype=float)
        #D[np.diag_indices(min(X_.shape))] = s
        
        U_trunc, S_trunc, V_trunc = self.truncate(U, s, V, self.k)

        trunc_X = U_trunc @ np.diag(S_trunc)
        reduced_data = pd.DataFrame(trunc_X)
        return reduced_data

    @staticmethod
    def truncate(U, S, V, k):
        U_trunc = U[:, :k]
        S_trunc = S[:k]
        V_trunc = V[:k, :]
        return U_trunc, S_trunc, V_trunc

In [None]:
data = load_iris(as_frame=True)

In [None]:
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=123)

In [None]:
X.sample(2)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
17,5.1,3.5,1.4,0.3
92,5.8,2.6,4.0,1.2


In [None]:
y_train.value_counts(normalize=True)

1    0.333333
0    0.333333
2    0.333333
Name: target, dtype: float64

In [None]:
pipe_one = make_pipeline(MinMaxScaler(), SVD(k=3), LogisticRegression(random_state=123))
pipe_two = make_pipeline(MinMaxScaler(), PCA(n_components=3), LogisticRegression())
pipe_three = make_pipeline(MinMaxScaler(), TruncatedSVD(n_components=3, algorithm='arpack'), LogisticRegression())
pipe_four = make_pipeline(MinMaxScaler(), KernelPCA(n_components=3), LogisticRegression())
pipe_raw = make_pipeline(MinMaxScaler(), LogisticRegression())

In [None]:
pipe_one.fit(X_train, y_train)
print(classification_report(y_test, pipe_one.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.91      0.67      0.77        15
           2       0.74      0.93      0.82        15

    accuracy                           0.87        45
   macro avg       0.88      0.87      0.86        45
weighted avg       0.88      0.87      0.86        45



In [None]:
pipe_two.fit(X_train, y_train)
print(classification_report(y_test, pipe_two.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.80      0.89        15
           2       0.83      1.00      0.91        15

    accuracy                           0.93        45
   macro avg       0.94      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45



In [None]:
pipe_three.fit(X_train, y_train)
print(classification_report(y_test, pipe_three.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.80      0.89        15
           2       0.83      1.00      0.91        15

    accuracy                           0.93        45
   macro avg       0.94      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45



In [None]:
pipe_four.fit(X_train, y_train)
print(classification_report(y_test, pipe_four.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.80      0.89        15
           2       0.83      1.00      0.91        15

    accuracy                           0.93        45
   macro avg       0.94      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45



In [None]:
pipe_raw.fit(X_train, y_train)
print(classification_report(y_test, pipe_raw.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.80      0.89        15
           2       0.83      1.00      0.91        15

    accuracy                           0.93        45
   macro avg       0.94      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45

