In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

In [3]:
digits = datasets.load_digits()

features = StandardScaler().fit_transform(digits.data)

# pca to retain 99% variance
pca = PCA(n_components=0.99, whiten=True)

features_pca = pca.fit_transform(features)

print(f'original features {features.shape[1]}')
print(f'reduced features {features_pca.shape[1]}')

original features 64
reduced features 54


In [5]:
# linearly inseparable data
from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_circles

features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1)

In [6]:
kpca = KernelPCA(kernel='rbf',  gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)


print(f'original features {features.shape[1]}')
print(f'reduced features {features_kpca.shape[1]}')

original features 2
reduced features 1


In [9]:
# linear discriminant
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

iris = datasets.load_iris()
features = iris.data
target = iris.target

lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)

In [10]:
lda = LinearDiscriminantAnalysis(n_components=None)
features_lda = lda.fit(features, target)

lda_var_ratios = lda.explained_variance_ratio_

def select_n_components(var_ratio, goal_var: float) -> int:
    # set initial variance explained so far
    total_variance = 0.0
    
    n_components = 0
    
    for explained_variance in var_ratio:
        
        # add the explained var to total
        total_variance += explained_variance
        
        n_components += 1
        
        # if we reach goal level of explained var
        if total_variance >= goal_var:
            break
            
    return n_components

In [11]:
select_n_components(lda_var_ratios, 0.95)

1

In [12]:
# matrix factorization
from sklearn.decomposition import NMF

features = digits.data

nmf = NMF(n_components=10, random_state=1)
features_nmf = nmf.fit_transform(features)

print(f'original features {features.shape[1]}')
print(f'reduced features {features_nmf.shape[1]}')

original features 64
reduced features 10


In [13]:
# sparse data - truncated singular value decomposition
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import numpy as np

features = StandardScaler().fit_transform(digits.data)

features_sparse = csr_matrix(features)

tsvd = TruncatedSVD(n_components=10)
features_sparse_tsvd = tsvd.fit(features_sparse).transform(features_sparse)

print(f'original features {features.shape[1]}')
print(f'reduced features {features_sparse_tsvd.shape[1]}')

original features 64
reduced features 10
