In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import random as sparse_random
from sklearn.random_projection import sparse_random_matrix

X = sparse_random(
    100, 75, density=0.01, format='csr', random_state=42
)

Sample size is 100. Number of features is 75.

In [2]:
X.shape

(100, 75)

SVD fit and transform

In [3]:
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

In [4]:
X_reduced = svd.fit_transform(X)

Sample size is 100. Stay the same

In [5]:
len(X_reduced)

100

For each sample, there are 5 weights for 5 SVD components

In [6]:
X_reduced[0]

array([-1.44293544e-16, -2.54903102e-16,  2.68602235e-17, -2.80898219e-18,
       -1.14272628e-17])

There are 5 SVD components

In [7]:
len(svd.components_)

5

For each SVD component, there are 75 weights for 75 features

In [8]:
len(svd.components_[0])

75

In [9]:
svd.components_[0]

array([ 1.10520241e-17, -2.13416317e-06, -3.83098358e-09,  4.55625306e-18,
       -2.02097881e-06,  2.66380814e-07, -4.04028440e-06, -3.59644776e-07,
       -5.42389396e-20,  3.10111822e-12,  4.25265963e-20, -6.60190292e-06,
       -1.42922449e-21,  1.22031825e-21,  1.45607724e-06,  0.00000000e+00,
       -3.42857550e-05,  4.16477553e-06, -3.30406967e-07,  0.00000000e+00,
       -4.33346450e-07, -6.89358354e-09,  1.53275764e-01,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  6.56047529e-10,
        7.93026388e-01,  0.00000000e+00, -1.06433483e-07, -5.44788007e-05,
        1.24824313e-06, -3.71725145e-08, -1.68976301e-08,  1.67615601e-07,
        0.00000000e+00,  0.00000000e+00, -1.59310529e-17,  8.08972031e-12,
       -1.95304430e-08, -7.39766969e-07, -5.54259706e-06,  2.63720851e-16,
        9.61977151e-02,  2.16639041e-07,  0.00000000e+00, -6.00662420e-06,
        6.63034888e-12,  3.13210384e-07,  2.93393863e-07,  2.44278053e-05,
       -9.30234895e-08, -

<br>
<br>

Playground

Each vector is a unit length vector

In [10]:
sum([e**2 for e in svd.components_[0]])

0.9999999999999992

Multiply two matrixes should roughtly be an identity matrix

In [11]:
np.matmul(svd.components_, svd.components_.T).round(10)

array([[ 1.,  0., -0., -0.,  0.],
       [ 0.,  1., -0., -0., -0.],
       [-0., -0.,  1., -0.,  0.],
       [-0., -0., -0.,  1.,  0.],
       [ 0., -0.,  0.,  0.,  1.]])

Singular values (which are square roots of eigenvalues) are in order from large to small

In [12]:
svd.singular_values_

array([1.57665789, 1.56338171, 1.40235249, 1.40134898, 1.26083335])