# Example 2

It's an example with sparse matrix $B$ $\left(f(x) = \overline{f}(Bx)\right)$ showing that PCA will produce a dense response while sPCA will give a sparse one

In [1]:
import sys
import os.path as op
sys.path = [op.abspath('../')] + sys.path + [op.abspath('../')]

In [2]:
import regression
import seaborn as sns
sns.set()
%matplotlib inline

In [3]:
import pandas as pd
import numpy as np
import scipy

### Generate toy dataset with sparse matrix B

In [4]:
X, y, B = regression.data_2example()

In [9]:
pd.DataFrame(data = X).corr()

Unnamed: 0,0,1,2,3,4,5,6,7
0,1.0,0.252892,-0.007687,-0.153986,-0.042288,0.311187,-0.023219,0.288986
1,0.252892,1.0,-0.231176,0.086859,-0.284753,0.242111,0.23461,-0.562033
2,-0.007687,-0.231176,1.0,0.334772,-0.244553,0.121942,-0.052018,-0.148811
3,-0.153986,0.086859,0.334772,1.0,-0.110237,-0.034457,0.209375,0.041901
4,-0.042288,-0.284753,-0.244553,-0.110237,1.0,0.168538,-0.18237,0.234197
5,0.311187,0.242111,0.121942,-0.034457,0.168538,1.0,0.119865,-0.198346
6,-0.023219,0.23461,-0.052018,0.209375,-0.18237,0.119865,1.0,-0.189481
7,0.288986,-0.562033,-0.148811,0.041901,0.234197,-0.198346,-0.189481,1.0


In [5]:
index, columns = regression.index_columns(B)
pd.DataFrame(regression.norm(B), index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,0.0,0.0,0.0
Feature 2,0.0,0.0,0.0
Feature 3,0.0,0.0,0.0
Feature 4,0.501194,0.0,0.0
Feature 5,0.865335,0.0,0.0
Feature 6,0.0,0.0,0.0
Feature 7,0.0,0.0,1.0
Feature 8,0.0,1.0,0.0


## Apply Principal Component Analysis

In [6]:
from sklearn.decomposition import PCA, SparsePCA
from edrgp.regression import GaussianProcessRegressor
from edrgp.edr import EffectiveDimensionalityReduction
PCA_edr = EffectiveDimensionalityReduction(GaussianProcessRegressor(),
                                           PCA(n_components=3), normalize=True)
PCA_edr.fit(X, y)

pd.DataFrame(PCA_edr.components_.T, index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,0.071642,-0.039048,-0.38033
Feature 2,-0.032693,0.028409,0.17963
Feature 3,0.068556,0.009638,-0.52708
Feature 4,-0.036075,-0.622345,0.535288
Feature 5,-0.002909,-0.851733,-0.296182
Feature 6,-0.014039,-0.020969,0.151764
Feature 7,-0.024345,-0.000176,-0.441612
Feature 8,-0.914174,0.015333,-0.062793


## Apply Sparse Principal Component Analysis

In [8]:
sPCA_edr = EffectiveDimensionalityReduction(GaussianProcessRegressor(),
                                            SparsePCA(n_components=3, alpha=1),
                                            normalize=True)
sPCA_edr.fit(X, y)

pd.DataFrame(regression.norm(sPCA_edr.components_.T), index=index, columns=columns)

Unnamed: 0,Combnation 1,Combnation 2,Combnation 3
Feature 1,0.0,0.0,0.0
Feature 2,0.0,0.0,0.0
Feature 3,0.0,0.0,0.0
Feature 4,0.0,0.0,-0.577664
Feature 5,0.0,0.0,-0.816275
Feature 6,0.0,0.0,0.0
Feature 7,1.0,0.0,0.0
Feature 8,0.0,-1.0,0.0
