Skip to content

Commit

Permalink
EXA Fix bad data visualisation in "Importance of Feature Scaling" (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
benjastudio authored and jnothman committed Oct 15, 2018
1 parent 215d0bc commit 7442005
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions examples/preprocessing/plot_scaling_importance.py
Expand Up @@ -93,24 +93,27 @@
print('\nPC 1 without scaling:\n', pca.components_[0])
print('\nPC 1 with scaling:\n', pca_std.components_[0])

# Scale and use PCA on X_train data for visualization.
# Use PCA without and with scale on X_train data for visualization.
X_train_transformed = pca.transform(X_train)
scaler = std_clf.named_steps['standardscaler']
X_train_std = pca_std.transform(scaler.transform(X_train))
X_train_std_transformed = pca_std.transform(scaler.transform(X_train))

# visualize standardized vs. untouched dataset with PCA performed
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE)


for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
ax1.scatter(X_train[y_train == l, 0], X_train[y_train == l, 1],
ax1.scatter(X_train_transformed[y_train == l, 0],
X_train_transformed[y_train == l, 1],
color=c,
label='class %s' % l,
alpha=0.5,
marker=m
)

for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
ax2.scatter(X_train_std[y_train == l, 0], X_train_std[y_train == l, 1],
ax2.scatter(X_train_std_transformed[y_train == l, 0],
X_train_std_transformed[y_train == l, 1],
color=c,
label='class %s' % l,
alpha=0.5,
Expand Down

0 comments on commit 7442005

Please sign in to comment.