In [3]:
import numpy as np
np.random.seed(4)
m = 60
w1, w2 = 0.1, 0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5
X = np.empty((m, 3))
X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2
X[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

In [4]:
X_centered = X - X.mean(axis=0)
U, s, V = np.linalg.svd(X_centered)
c1 = V.T[:,0]
c2 = V.T[:,1]
print('c1: %n\t', c1 )
print('c2:', c2)

c1: %n	 [0.93636116 0.29854881 0.18465208]
c2: [-0.34027485  0.90119108  0.2684542 ]


In [5]:
Wd = V.T[:,:2]
Xd = X_centered.dot(Wd)
print(Xd)

[[-1.26203346 -0.42067648]
 [ 0.08001485  0.35272239]
 [-1.17545763 -0.36085729]
 [-0.89305601  0.30862856]
 [-0.73016287  0.25404049]
 [ 1.10436914 -0.20204953]
 [-1.27265808 -0.46781247]
 [ 0.44933007 -0.67736663]
 [ 1.09356195  0.04467792]
 [ 0.66177325  0.28651264]
 [-1.04466138  0.11244353]
 [ 1.05932502 -0.31189109]
 [-1.13761426 -0.14576655]
 [-1.16044117 -0.36481599]
 [ 1.00167625 -0.39422008]
 [-0.2750406   0.34391089]
 [ 0.45624787 -0.69707573]
 [ 0.79706574  0.26870969]
 [ 0.66924929 -0.65520024]
 [-1.30679728 -0.37671343]
 [ 0.6626586   0.32706423]
 [-1.25387588 -0.56043928]
 [-1.04046987  0.08727672]
 [-1.26047729 -0.1571074 ]
 [ 1.09786649 -0.38643428]
 [ 0.7130973  -0.64941523]
 [-0.17786909  0.43609071]
 [ 1.02975735 -0.33747452]
 [-0.94552283  0.22833268]
 [ 0.80994916  0.33810729]
 [ 0.20189175  0.3514758 ]
 [-1.34219411 -0.42415687]
 [ 0.13599883  0.37258632]
 [ 0.8206931  -0.55120835]
 [ 0.90818634 -0.31869127]
 [ 0.06703671  0.42486148]
 [ 0.13936893  0.41906961]
 

In [6]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
Xd = pca.fit_transform(X)
print(Xd)

[[ 1.26203346  0.42067648]
 [-0.08001485 -0.35272239]
 [ 1.17545763  0.36085729]
 [ 0.89305601 -0.30862856]
 [ 0.73016287 -0.25404049]
 [-1.10436914  0.20204953]
 [ 1.27265808  0.46781247]
 [-0.44933007  0.67736663]
 [-1.09356195 -0.04467792]
 [-0.66177325 -0.28651264]
 [ 1.04466138 -0.11244353]
 [-1.05932502  0.31189109]
 [ 1.13761426  0.14576655]
 [ 1.16044117  0.36481599]
 [-1.00167625  0.39422008]
 [ 0.2750406  -0.34391089]
 [-0.45624787  0.69707573]
 [-0.79706574 -0.26870969]
 [-0.66924929  0.65520024]
 [ 1.30679728  0.37671343]
 [-0.6626586  -0.32706423]
 [ 1.25387588  0.56043928]
 [ 1.04046987 -0.08727672]
 [ 1.26047729  0.1571074 ]
 [-1.09786649  0.38643428]
 [-0.7130973   0.64941523]
 [ 0.17786909 -0.43609071]
 [-1.02975735  0.33747452]
 [ 0.94552283 -0.22833268]
 [-0.80994916 -0.33810729]
 [-0.20189175 -0.3514758 ]
 [ 1.34219411  0.42415687]
 [-0.13599883 -0.37258632]
 [-0.8206931   0.55120835]
 [-0.90818634  0.31869127]
 [-0.06703671 -0.42486148]
 [-0.13936893 -0.41906961]
 

In [7]:
print('c1:', c1)
print('c2:', c2)
print(pca.components_)

c1: [0.93636116 0.29854881 0.18465208]
c2: [-0.34027485  0.90119108  0.2684542 ]
[[-0.93636116 -0.29854881 -0.18465208]
 [ 0.34027485 -0.90119108 -0.2684542 ]]


In [8]:
print(pca.explained_variance_ratio_)

[0.84248607 0.14631839]


In [None]:
from sklearn.decomposition import IncrementalPCA
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original')
    
X = mnist["data"]
y = mnist["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
n_batches = 100
ica = IncrementalPCA(n_components=154)
for n_batch in np.array_split(X, n_batches):
    ica.partial_fit(n_batch)
    print('d')
X1 = ica.transform(X_train)
print(X1)

X0 = ica.inverse_transform(X1)

In [17]:
rnd_pca = PCA(n_components=2, svd_solver='randomized')
X_reduced = rnd_pca.fit_transform(X)
print(X_reduced)

[[ 1.26203346  0.42067648]
 [-0.08001485 -0.35272239]
 [ 1.17545763  0.36085729]
 [ 0.89305601 -0.30862856]
 [ 0.73016287 -0.25404049]
 [-1.10436914  0.20204953]
 [ 1.27265808  0.46781247]
 [-0.44933007  0.67736663]
 [-1.09356195 -0.04467792]
 [-0.66177325 -0.28651264]
 [ 1.04466138 -0.11244353]
 [-1.05932502  0.31189109]
 [ 1.13761426  0.14576655]
 [ 1.16044117  0.36481599]
 [-1.00167625  0.39422008]
 [ 0.2750406  -0.34391089]
 [-0.45624787  0.69707573]
 [-0.79706574 -0.26870969]
 [-0.66924929  0.65520024]
 [ 1.30679728  0.37671343]
 [-0.6626586  -0.32706423]
 [ 1.25387588  0.56043928]
 [ 1.04046987 -0.08727672]
 [ 1.26047729  0.1571074 ]
 [-1.09786649  0.38643428]
 [-0.7130973   0.64941523]
 [ 0.17786909 -0.43609071]
 [-1.02975735  0.33747452]
 [ 0.94552283 -0.22833268]
 [-0.80994916 -0.33810729]
 [-0.20189175 -0.3514758 ]
 [ 1.34219411  0.42415687]
 [-0.13599883 -0.37258632]
 [-0.8206931   0.55120835]
 [-0.90818634  0.31869127]
 [-0.06703671 -0.42486148]
 [-0.13936893 -0.41906961]
 

In [20]:
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(kernel='rbf', n_components=2, gamma=0.04)
X_reduced = kpca.fit_transform(X)
print(X_reduced)

[[ 0.33786316 -0.1192203 ]
 [-0.02280676  0.10683317]
 [ 0.31721226 -0.10159442]
 [ 0.24438835  0.08161247]
 [ 0.2014532   0.07147441]
 [-0.29439981 -0.06323396]
 [ 0.33975729 -0.13172112]
 [-0.12514161 -0.17742183]
 [-0.29085158  0.00190957]
 [-0.18139861  0.0800079 ]
 [ 0.2845517   0.02588877]
 [-0.28337959 -0.09060256]
 [ 0.30850419 -0.04450644]
 [ 0.31340384 -0.10239479]
 [-0.26928333 -0.11107034]
 [ 0.07636648  0.10376487]
 [-0.12688961 -0.18267852]
 [-0.2164886   0.07146554]
 [-0.18363457 -0.17417343]
 [ 0.34859223 -0.10897456]
 [-0.18120132  0.09026035]
 [ 0.33420115 -0.1545828 ]
 [ 0.28349312  0.01949245]
 [ 0.33855733 -0.0512129 ]
 [-0.29235179 -0.11176056]
 [-0.19508196 -0.17335683]
 [ 0.04920639  0.12874305]
 [-0.27638193 -0.09682663]
 [ 0.25851607  0.05924208]
 [-0.21908365  0.08897661]
 [-0.05668966  0.10564117]
 [ 0.35669695 -0.12205671]
 [-0.03834616  0.11178932]
 [-0.22337811 -0.14910536]
 [-0.24642626 -0.08900101]
 [-0.01897407  0.12547079]
 [-0.03911441  0.12401794]
 

In [22]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

clf = Pipeline(
    ('clf_pca', KernelPCA(n_components=2)),
    ('clf_reg', LogisticRegression(solver="liblinear"))
)

para = [{
    'kpca_gamma':np.linspace(0.03, 0.05, 10),
    'kpca_kernel':['rbf','sigmoid']
}]

para_grid = GridSearchCV(clf, para, cv=3)
para_grid = fit(X,y)

print(para_grid.best_params_)

TypeError: zip argument #2 must support iteration

In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

clf = Pipeline([
        ("kpca", KernelPCA(n_components=2)),
        ("log_reg", LogisticRegression(solver="liblinear"))
    ])

param_grid = [{
        "kpca__gamma": np.linspace(0.03, 0.05, 10),
        "kpca__kernel": ["rbf", "sigmoid"]
    }]

grid_search = GridSearchCV(clf, param_grid, cv=3)
grid_search.fit(X, y)

NameError: name 'y' is not defined