In [1]:
from __future__ import print_function
import keras
from keras import backend as K
from keras.datasets import cifar10
import numpy as np
import os
from sklearn.decomposition import PCA

# Training parameters
batch_size = 256  # orig paper trained all networks with batch_size=128
epochs = 20
data_augmentation = False
num_classes = 10
subtract_pixel_mean = True
# Load the CIFAR10 data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
print('Original Data Size:' + '-'*30)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

part_num = 50
x_train = x_train[:part_num]
y_train = y_train[:part_num]
x_train = np.reshape(x_train,(part_num,-1))
print('After reshape Data Size:' + '-'*30)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
# reduced_data = PCA(n_components=2).fit_transform(x_train)
# kmeans.fit(reduced_data)

# pca = PCA(n_components=100)
# pca = PCA(n_components='mle',svd_solver='full')
pca = PCA(n_components=0.99,svd_solver='full')


reduced_data = pca.fit_transform(x_train)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


Original Data Size:------------------------------
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
y_train shape: (50000, 1)
After reshape Data Size:------------------------------
x_train shape: (50, 3072)
50 train samples


### PCA result

In [3]:
print(pca.explained_variance_ratio_)
print(pca.explained_variance_ratio_.max())
print(pca.explained_variance_ratio_.shape)

[0.27336046 0.13460453 0.08721387 0.06283692 0.04239827 0.03685037
 0.03212363 0.02511067 0.02182806 0.02106212 0.02006221 0.01705204
 0.01544581 0.0147708  0.01373287 0.01283195 0.01223379 0.01153102
 0.00981067 0.00957404 0.00931643 0.00873205 0.00811996 0.00755211
 0.0069366  0.00653193 0.00612945 0.00566806 0.00532946 0.00520345
 0.00493816 0.00468588 0.00418686 0.00398793 0.00359682 0.00350649
 0.00340684 0.00336123 0.00304037 0.00287496 0.00280883 0.00261611
 0.00234626 0.00225002]
0.27336046
(44,)


### PCA Elements

In [2]:
print(reduced_data.shape)
reduced_data[1]

(50, 44)


array([ 0.586811  , -2.7966866 , -6.5749774 , -2.9056726 , -0.3026667 ,
        1.7615906 ,  3.572931  ,  1.237919  ,  0.429713  ,  5.586577  ,
        3.6647234 , -0.45530373, -2.7605317 ,  5.124715  ,  0.40955803,
       -2.946985  , -0.91104746,  1.262489  ,  2.5640774 , -1.0910196 ,
        0.4584935 ,  0.2755828 , -1.5907949 ,  0.443775  , -1.2450556 ,
        0.42605388, -0.5726096 ,  0.0796786 , -0.28614712,  0.87537265,
        0.48068228,  0.99402815,  0.05110855, -0.30548093,  0.14032829,
        0.6684467 , -0.20254947, -0.06749538,  0.47563627,  0.2025691 ,
       -0.5228675 ,  0.04039957,  0.26350164,  0.11637776], dtype=float32)

### Simple Demo

In [26]:
import numpy as np
from sklearn.decomposition import PCA
# X = np.array([[1, -1], [2, -2], [3, -4], [4, -8], [5, -16], [6, -32]])
X = np.random.randn(6,2)
print(X.shape)
# pca = PCA(n_components='mle')
pca = PCA(n_components=2)
pca.fit(X)
print(X)
newX=pca.fit_transform(X)
print(newX)
print(pca.explained_variance_ratio_)


(6, 2)
[[-0.5896555   1.2734815 ]
 [ 1.1668102   0.43326799]
 [ 0.41645143  2.24184278]
 [ 0.6985449   0.26939814]
 [ 1.93799576  0.05153553]
 [ 0.65975364 -0.46664824]]
[[ 1.29248242 -0.66388833]
 [-0.43078836  0.24246055]
 [ 1.467769    0.72148142]
 [-0.28238034 -0.23093229]
 [-1.19768897  0.63273007]
 [-0.84939374 -0.70185143]]
[0.7605655 0.2394345]
