# Example 2

It's an example with sparse matrix $B:\: f(x) = \overline{f}(Bx)$ showing that PCA will produce a dense response while sPCA will give a sparse one

In [1]:
import sys
import os.path as op
sys.path = [op.abspath('../')] + sys.path + [op.abspath('../')]

In [2]:
import regression
import seaborn as sns
sns.set()
%matplotlib inline

In [3]:
import pandas as pd
import numpy as np
import scipy

### Generate toy dataset with sparse matrix B

In [4]:
X, y, B = regression.data_2example()

In [5]:
pd.DataFrame(data = X).corr()

Unnamed: 0,0,1,2,3,4,5,6,7
0,1.0,0.014039,0.549671,0.051855,0.164788,-0.244727,0.277482,0.242458
1,0.014039,1.0,0.20917,0.014535,-0.026456,0.193332,0.083563,-0.092884
2,0.549671,0.20917,1.0,-0.205494,-0.018576,-0.125941,0.129037,-0.187294
3,0.051855,0.014535,-0.205494,1.0,-0.020496,-0.231187,0.045784,-0.471113
4,0.164788,-0.026456,-0.018576,-0.020496,1.0,-0.43877,0.213312,-0.053341
5,-0.244727,0.193332,-0.125941,-0.231187,-0.43877,1.0,0.095091,0.054433
6,0.277482,0.083563,0.129037,0.045784,0.213312,0.095091,1.0,-0.016974
7,0.242458,-0.092884,-0.187294,-0.471113,-0.053341,0.054433,-0.016974,1.0


In [6]:
index, columns = regression.index_columns(B)
pd.DataFrame(regression.norm(B), index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,0.0,0.0,0.0
Feature 2,0.0,0.0,0.0
Feature 3,0.0,0.0,0.0
Feature 4,0.501194,0.0,0.0
Feature 5,0.865335,0.0,0.0
Feature 6,0.0,0.0,0.0
Feature 7,0.0,0.0,1.0
Feature 8,0.0,1.0,0.0


## Apply Principal Component Analysis

In [7]:
from sklearn.decomposition import SparsePCA
from edrgp.utils import CustomPCA
from edrgp.gp_model import GaussianProcessRegressor
from edrgp import EffectiveDimensionalityReduction
PCA_edr = EffectiveDimensionalityReduction(GaussianProcessRegressor(),
                                           CustomPCA(), n_components=3,
                                           normalize=True)
PCA_edr.fit(X, y)

pd.DataFrame(PCA_edr.components_.T, index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,-0.001461,-0.00215,-0.036842
Feature 2,0.001391,0.004854,0.04902
Feature 3,0.001165,-0.009788,0.001309
Feature 4,-0.249124,0.370467,0.119812
Feature 5,-0.415969,0.582878,0.3443
Feature 6,0.007623,0.018629,0.051341
Feature 7,-0.326161,0.31261,-0.981653
Feature 8,-0.783241,-0.592321,0.071358


## Apply Sparse Principal Component Analysis

In [11]:
sPCA_edr = EffectiveDimensionalityReduction(GaussianProcessRegressor(),
                                            CustomPCA(),
                                            normalize=True)
sPCA_edr.fit(X, y)
sPCA_edr.refit(SparsePCA(n_components=3, alpha=1))
pd.DataFrame(regression.norm(sPCA_edr.refit_components_.T), index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,0.0,0.0,0.0
Feature 2,0.0,0.0,0.0
Feature 3,0.0,0.0,0.0
Feature 4,0.0,0.0,-0.339497
Feature 5,0.0,0.0,-0.940607
Feature 6,0.0,0.0,0.0
Feature 7,0.0,-1.0,0.0
Feature 8,-1.0,0.0,0.0
