# Dimensionality Reduction

#### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#### Import Dataset

In [2]:
df = pd.read_csv('Life Expectancy Data.csv')
df.head(3)

Unnamed: 0,Country,Year,Status,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,...,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
0,Afghanistan,2015,Developing,65.0,263.0,62,0.01,71.279624,65.0,1154,...,6.0,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1
1,Afghanistan,2014,Developing,59.9,271.0,64,0.01,73.523582,62.0,492,...,58.0,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0
2,Afghanistan,2013,Developing,59.9,268.0,66,0.01,73.219243,64.0,430,...,62.0,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9


In [3]:
df.drop(columns=['Country', 'Status'], inplace=True)

#### Nulls Removing

In [4]:
import warnings
warnings.filterwarnings('ignore')
df.fillna(value=df.mean(), inplace = True)

In [5]:
df.shape

(2938, 20)

#### train_test_split

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='Life expectancy '), df['Life expectancy '], test_size=0.2, random_state=70)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((2350, 19), (588, 19), (2350,), (588,))

#### Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### Model Building and training

In [8]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, hinge_loss, r2_score

svr = SVR()
svr.fit(X_train_scaled, y_train)

#### Model Evaluation

In [9]:
y_pred = svr.predict(X_test_scaled)
evaluations = pd.DataFrame(index = ['R-squared Score', 'MSE'], columns=['svm'])
evaluations.loc['R-squared Score', 'svm'] = svr.score(X_test_scaled, y_test)
evaluations.loc['MSE', 'svm'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm
R-squared Score,0.846243
MSE,13.544659


## Dimensionality Reduction Algorithms

#### 1. Linear Discriminant Analysis (LDA)

In [10]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=3)
lda.get_params()

{'covariance_estimator': None,
 'n_components': 3,
 'priors': None,
 'shrinkage': None,
 'solver': 'svd',
 'store_covariance': False,
 'tol': 0.0001}

#### LDA is a supervised ml algo used with categorical data, and is not used with Continous data.

#### 2. Neighborhood Component Analysis (NCA)

In [11]:
from sklearn.neighbors import NeighborhoodComponentsAnalysis
nca = NeighborhoodComponentsAnalysis(n_components=3)
nca.get_params()

{'callback': None,
 'init': 'auto',
 'max_iter': 50,
 'n_components': 3,
 'random_state': None,
 'tol': 1e-05,
 'verbose': 0,
 'warm_start': False}

#### NCA is a supervised ml algo, used with categorical data.

#### 3. PLSRegression

In [12]:
from sklearn.cross_decomposition import PLSRegression
plsr = PLSRegression(n_components=3)
plsr.get_params()

{'copy': True, 'max_iter': 500, 'n_components': 3, 'scale': True, 'tol': 1e-06}

#### Dimensions Reduction with Cross-Decomposition (PLSRegression)

In [13]:
X_train_reduced = plsr.fit_transform(X_train_scaled, y_train)
X_test_reduced = plsr.transform(X_test_scaled)

In [14]:
X_train_reduced[0].shape, X_test_reduced[0].shape

((2350, 3), (3,))

#### SVM trained with data of reduced dimensions by plsregression

In [15]:
svr.fit(X_train_reduced[0], y_train)

#### Model Evaluation

In [16]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'pls'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'pls'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls
R-squared Score,0.846243,0.802769
MSE,13.544659,17.3744


#### PLSRegression is a supervised ml algo, it is fast, and Captured most of the features

#### 4. Isomap

In [17]:
from sklearn.manifold import Isomap
isomap = Isomap(n_components=3)
isomap.get_params()

{'eigen_solver': 'auto',
 'max_iter': None,
 'metric': 'minkowski',
 'metric_params': None,
 'n_components': 3,
 'n_jobs': None,
 'n_neighbors': 5,
 'neighbors_algorithm': 'auto',
 'p': 2,
 'path_method': 'auto',
 'radius': None,
 'tol': 0}

#### Dimension Reduction using Isomap

In [18]:
X_train_reduced = isomap.fit_transform(X_train_scaled)
X_test_reduced = isomap.transform(X_test_scaled)
X_train_reduced.shape, X_test_reduced.shape

((2350, 3), (588, 3))

#### SVM training with dimension reduced data

In [19]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [20]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'isomap'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'isomap'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap
R-squared Score,0.846243,0.802769,0.668215
MSE,13.544659,17.3744,29.227484


#### Isomap is an Unsupervised Method, Used in Non-linear Dimensionality reduction, and it gave good results by capturing imp features.

#### 5. Locally Linear Embedding (LLE)

In [21]:
from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_components=3)
lle.get_params()

{'eigen_solver': 'auto',
 'hessian_tol': 0.0001,
 'max_iter': 100,
 'method': 'standard',
 'modified_tol': 1e-12,
 'n_components': 3,
 'n_jobs': None,
 'n_neighbors': 5,
 'neighbors_algorithm': 'auto',
 'random_state': None,
 'reg': 0.001,
 'tol': 1e-06}

#### Dimension Reduction using LLE

In [22]:
X_train_reduced = lle.fit_transform(X_train_scaled)
X_test_reduced = lle.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### SVM trained with dimension reduced data

In [23]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [24]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'lle'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'lle'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle
R-squared Score,0.846243,0.802769,0.668215,0.155191
MSE,13.544659,17.3744,29.227484,74.420566


#### LLE is an Unsupervised method, Used for dimensionality reduction which preserves distances within local neighbors, it gave poor result

#### 6. Spectral Embedding

In [25]:
from sklearn.manifold import SpectralEmbedding
spec_emb = SpectralEmbedding(n_components=3)
spec_emb.get_params()

{'affinity': 'nearest_neighbors',
 'eigen_solver': None,
 'eigen_tol': 'auto',
 'gamma': None,
 'n_components': 3,
 'n_jobs': None,
 'n_neighbors': None,
 'random_state': None}

#### Dimension Reduction using Spectral Embedding

In [26]:
X_train_reduced = spec_emb.fit_transform(X_train_scaled)
X_test_reduced = spec_emb.fit_transform(X_test_scaled)
X_train_reduced.shape, X_test_reduced.shape

((2350, 3), (588, 3))

#### SVM trained with dimension reduced data

In [27]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [28]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'spec_emb'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'spec_emb'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422
MSE,13.544659,17.3744,29.227484,74.420566,97.554612


#### Spectral embedding is an unsupervised method used for non-linear dimensionality reduction,it gave poor results

#### 7. t-Stochastic Neighbor Embedding (TSNE)

In [34]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=3)
tsne.get_params()

{'angle': 0.5,
 'early_exaggeration': 12.0,
 'init': 'pca',
 'learning_rate': 'auto',
 'method': 'barnes_hut',
 'metric': 'euclidean',
 'metric_params': None,
 'min_grad_norm': 1e-07,
 'n_components': 3,
 'n_iter': 1000,
 'n_iter_without_progress': 300,
 'n_jobs': None,
 'perplexity': 30.0,
 'random_state': None,
 'square_distances': 'deprecated',
 'verbose': 0}

#### Dimension Reduction using TSNE

In [35]:
X_train_reduced = tsne.fit_transform(X_train_scaled)
X_test_reduced = tsne.fit_transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained with dimension reduced data

In [36]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [37]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'tsne'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'tsne'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487


#### TSNE is an unsupervised method, it consumed more time and gave good results

#### 8. PCA

In [39]:
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.get_params()

{'copy': True,
 'iterated_power': 'auto',
 'n_components': 3,
 'n_oversamples': 10,
 'power_iteration_normalizer': 'auto',
 'random_state': None,
 'svd_solver': 'auto',
 'tol': 0.0,
 'whiten': False}

#### Dimension Reduction using PCA

In [40]:
X_train_reduced = pca.fit_transform(X_train_scaled)
X_test_reduced = pca.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained with dimension reduced data

In [41]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [42]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'pca'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'pca'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131


#### PCA is an unsupervised method, It is very fast, and Gave better results

#### 9. IncrementalPCA 

In [43]:
from sklearn.decomposition import IncrementalPCA
ipca = IncrementalPCA(n_components=3)
ipca.get_params()

{'batch_size': None, 'copy': True, 'n_components': 3, 'whiten': False}

#### Dimension Reduction using Incremental PCA

In [44]:
X_train_reduced = ipca.fit_transform(X_train_scaled)
X_test_reduced = ipca.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained with dimension reduced data

In [45]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [46]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'inc-pca'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'inc-pca'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193


#### Incremental PCA is similar to PCA with more memory efficient than PCA, and it allows sparse Input

#### 10. SparsePCA

In [47]:
from sklearn.decomposition import SparsePCA
spca = SparsePCA(n_components=3)
spca.get_params()

{'U_init': None,
 'V_init': None,
 'alpha': 1,
 'max_iter': 1000,
 'method': 'lars',
 'n_components': 3,
 'n_jobs': None,
 'random_state': None,
 'ridge_alpha': 0.01,
 'tol': 1e-08,
 'verbose': False}

#### Dimension reduction using Sparse PCA

In [48]:
X_train_reduced = spca.fit_transform(X_train_scaled)
X_test_reduced = spca.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained with dimension reduced data

In [49]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [50]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'sparse-pca'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'sparse-pca'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376


#### Sparse PCA finds the set of sparse components that can optionally reconstruct the data, and it Gave good results

#### 11. KernelPCA

In [51]:
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=3)
kpca.get_params()

{'alpha': 1.0,
 'coef0': 1,
 'copy_X': True,
 'degree': 3,
 'eigen_solver': 'auto',
 'fit_inverse_transform': False,
 'gamma': None,
 'iterated_power': 'auto',
 'kernel': 'linear',
 'kernel_params': None,
 'max_iter': None,
 'n_components': 3,
 'n_jobs': None,
 'random_state': None,
 'remove_zero_eig': False,
 'tol': 0}

#### Dimension reduction using Kernel PCA

In [52]:
X_train_reduced = kpca.fit_transform(X_train_scaled)
X_test_reduced = kpca.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained on dimension reduced data

In [53]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [54]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'kernel-pca'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'kernel-pca'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca,kernel-pca
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115,0.744921
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376,22.470349


#### KernelPCA is similar to PCA, and it is used in Non-Linear Dimensionality Reduction

#### 12. TruncatedSVD

In [55]:
from sklearn.decomposition import TruncatedSVD
tsvd = TruncatedSVD(n_components=3)
tsvd.get_params()

{'algorithm': 'randomized',
 'n_components': 3,
 'n_iter': 5,
 'n_oversamples': 10,
 'power_iteration_normalizer': 'auto',
 'random_state': None,
 'tol': 0.0}

#### Dimension reduction using Truncated SVD

In [56]:
X_train_reduced = tsvd.fit_transform(X_train_scaled)
X_test_reduced = tsvd.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained on dimension reduced data

In [57]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [58]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'tsvd'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'tsvd'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca,kernel-pca,tsvd
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115,0.744921,0.744921
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376,22.470349,22.470276


#### TruncatedSVD works efficiently with sparse matrices, and it Gave good Results

#### 13. FastICA (Independent Component Analysis)

In [59]:
from sklearn.decomposition import FastICA
fica = FastICA(n_components=3)
fica.get_params()

{'algorithm': 'parallel',
 'fun': 'logcosh',
 'fun_args': None,
 'max_iter': 200,
 'n_components': 3,
 'random_state': None,
 'tol': 0.0001,
 'w_init': None,
 'whiten': 'warn',
 'whiten_solver': 'svd'}

#### Dimension reduction using FastICA

In [60]:
X_train_reduced = fica.fit_transform(X_train_scaled)
X_test_reduced = fica.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained on dimension reduced data

In [61]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [62]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'fast-ica'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'fast-ica'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca,kernel-pca,tsvd,fast-ica
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115,0.744921,0.744921,0.742886
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376,22.470349,22.470276,22.649623


#### FastICA Gave good results, and it is used for separating superimposed signals

#### 14. NMF (Non-Negative Matrix Factorization)

In [63]:
from sklearn.decomposition import NMF
nmf = NMF(n_components=3)
nmf.get_params()

{'alpha_H': 'same',
 'alpha_W': 0.0,
 'beta_loss': 'frobenius',
 'init': None,
 'l1_ratio': 0.0,
 'max_iter': 200,
 'n_components': 3,
 'random_state': None,
 'shuffle': False,
 'solver': 'cd',
 'tol': 0.0001,
 'verbose': 0}

#### Data Preprocessing

In [64]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

#### Dimension reduction using NMF

In [65]:
X_train_reduced = nmf.fit_transform(X_train_scaled)
X_test_reduced = nmf.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained on dimension reduced data

In [66]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [67]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'nmf'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'nmf'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca,kernel-pca,tsvd,fast-ica,nmf
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115,0.744921,0.744921,0.742886,0.652845
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376,22.470349,22.470276,22.649623,30.581463


#### NMF gave good results, it is used for source separation, and topic extraction

#### 15. MiniBatch NMF

In [68]:
from sklearn.decomposition import MiniBatchNMF
mb_nmf = MiniBatchNMF(n_components=3)
mb_nmf.get_params()

{'alpha_H': 'same',
 'alpha_W': 0.0,
 'batch_size': 1024,
 'beta_loss': 'frobenius',
 'forget_factor': 0.7,
 'fresh_restarts': False,
 'fresh_restarts_max_iter': 30,
 'init': None,
 'l1_ratio': 0.0,
 'max_iter': 200,
 'max_no_improvement': 10,
 'n_components': 3,
 'random_state': None,
 'tol': 0.0001,
 'transform_max_iter': None,
 'verbose': 0}

#### Dimension reduction using minibatch NMF

In [69]:
X_train_reduced = mb_nmf.fit_transform(X_train_scaled)
X_test_reduced = mb_nmf.transform(X_test_scaled)
X_train_reduced.shape,X_test_reduced.shape

((2350, 3), (588, 3))

#### Model trained on dimension reduced data

In [70]:
svr.fit(X_train_reduced, y_train)

#### Model Evaluation

In [71]:
y_pred = svr.predict(X_test_reduced)
evaluations.loc['R-squared Score', 'mb-nmf'] = svr.score(X_test_reduced, y_test)
evaluations.loc['MSE', 'mb-nmf'] = mean_squared_error(y_test, y_pred)
evaluations

Unnamed: 0,svm,pls,isomap,lle,spec_emb,mds,tsne,pca,inc-pca,sparse-pca,kernel-pca,tsvd,fast-ica,nmf,mb-nmf
R-squared Score,0.846243,0.802769,0.668215,0.155191,-0.107422,-1.493837,0.644331,0.744923,0.733979,0.743115,0.744921,0.744921,0.742886,0.652845,0.63343
MSE,13.544659,17.3744,29.227484,74.420566,97.554612,219.686111,31.331487,22.470131,23.434193,22.629376,22.470349,22.470276,22.649623,30.581463,32.291778


#### MinibatchNMF gave good results

In [72]:
evaluations.T

Unnamed: 0,R-squared Score,MSE
svm,0.846243,13.544659
pls,0.802769,17.3744
isomap,0.668215,29.227484
lle,0.155191,74.420566
spec_emb,-0.107422,97.554612
mds,-1.493837,219.686111
tsne,0.644331,31.331487
pca,0.744923,22.470131
inc-pca,0.733979,23.434193
sparse-pca,0.743115,22.629376
