forked from amueller/introduction_to_ml_with_python
-
Notifications
You must be signed in to change notification settings - Fork 250
/
plot_pca.py
134 lines (110 loc) · 4.72 KB
/
plot_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from joblib import Memory
memory = Memory(cachedir="cache")
def plot_pca_illustration():
rnd = np.random.RandomState(5)
X_ = rnd.normal(size=(300, 2))
X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
pca = PCA()
pca.fit(X_blob)
X_pca = pca.transform(X_blob)
S = X_pca.std(axis=0)
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
axes = axes.ravel()
axes[0].set_title("원본 데이터")
axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0,
s=60, cmap='viridis')
axes[0].set_xlabel("특성 1")
axes[0].set_ylabel("특성 2")
axes[0].arrow(pca.mean_[0], pca.mean_[1], S[0] * pca.components_[0, 0],
S[0] * pca.components_[0, 1], width=.1, head_width=.3,
color='k')
axes[0].arrow(pca.mean_[0], pca.mean_[1], S[1] * pca.components_[1, 0],
S[1] * pca.components_[1, 1], width=.1, head_width=.3,
color='k')
axes[0].text(-1.2, -.3, "성분 2", size=14)
axes[0].text(-3, -3.5, "성분 1", size=14)
axes[0].set_aspect('equal')
axes[0].set_xlim(-8, 4)
axes[0].set_ylim(-8, 4)
axes[1].set_title("변환된 데이터")
axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0,
s=60, cmap='viridis')
axes[1].set_xlabel("첫번째 주성분")
axes[1].set_ylabel("두번째 주성분")
axes[1].set_aspect('equal')
axes[1].set_xlim(-8, 8)
axes[1].set_ylim(-8, 8)
pca = PCA(n_components=1)
pca.fit(X_blob)
X_inverse = pca.inverse_transform(pca.transform(X_blob))
axes[2].set_title("두번째 주성분을 제거한 변환된 데이터")
axes[2].scatter(X_pca[:, 0], np.zeros(X_pca.shape[0]), c=X_pca[:, 0],
linewidths=0, s=60, cmap='viridis')
axes[2].set_xlabel("첫번째 주성분")
axes[2].set_aspect('equal')
axes[2].set_xlim(-8, 8)
axes[2].set_ylim(-8, 8)
axes[3].set_title("첫번째 주성분만 사용하여 회전 복원")
axes[3].scatter(X_inverse[:, 0], X_inverse[:, 1], c=X_pca[:, 0],
linewidths=0, s=60, cmap='viridis')
axes[3].set_xlabel("특성 1")
axes[3].set_ylabel("특성 2")
axes[3].set_aspect('equal')
axes[3].set_xlim(-8, 4)
axes[3].set_ylim(-8, 4)
def plot_pca_whitening():
rnd = np.random.RandomState(5)
X_ = rnd.normal(size=(300, 2))
X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
pca = PCA(whiten=True)
pca.fit(X_blob)
X_pca = pca.transform(X_blob)
fig, axes = plt.subplots(1, 2, figsize=(10, 10))
axes = axes.ravel()
axes[0].set_title("원본 데이터")
axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
axes[0].set_xlabel("특성 1")
axes[0].set_ylabel("특성 2")
axes[0].set_aspect('equal')
axes[1].set_title("화이트닝된 데이터")
axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
axes[1].set_xlabel("첫번째 주성분")
axes[1].set_ylabel("두번째 주성분")
axes[1].set_aspect('equal')
axes[1].set_xlim(-3, 4)
@memory.cache
def pca_faces(X_train, X_test):
# copy and pasted from nmf. refactor?
# Build NMF models with 10, 50, 100, 500 components
# this list will hold the back-transformd test-data
reduced_images = []
for n_components in [10, 50, 100, 500]:
# build the NMF model
pca = PCA(n_components=n_components)
pca.fit(X_train)
# transform the test data (afterwards has n_components many dimensions)
X_test_pca = pca.transform(X_test)
# back-transform the transformed test-data
# (afterwards it's in the original space again)
X_test_back = pca.inverse_transform(X_test_pca)
reduced_images.append(X_test_back)
return reduced_images
def plot_pca_faces(X_train, X_test, image_shape):
reduced_images = pca_faces(X_train, X_test)
# plot the first three images in the test set:
fix, axes = plt.subplots(3, 5, figsize=(15, 12),
subplot_kw={'xticks': (), 'yticks': ()})
for i, ax in enumerate(axes):
# plot original image
ax[0].imshow(X_test[i].reshape(image_shape),
vmin=0, vmax=1)
# plot the four back-transformed images
for a, X_test_back in zip(ax[1:], reduced_images):
a.imshow(X_test_back[i].reshape(image_shape), vmin=0, vmax=1)
# label the top row
axes[0, 0].set_title("원본 이미지")
for ax, n_components in zip(axes[0, 1:], [10, 50, 100, 500, 2000]):
ax.set_title("성분 %d개" % n_components)