# Summary

Here we only apply the encoders trained previously (see: 'dimensionality_reduction.ipynb') to the whole data set, making the samples 20-dimensional, and then we use decoders to reproduce the images back from this representation, and to finally store it to new files.

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
path = '/content/gdrive/My Drive/Colab Notebooks/Fashion_MNIST'

In [3]:
import os
from joblib import dump, load
import numpy as np
import matplotlib.pyplot as plt

from keras.datasets import fashion_mnist

from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline

import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import load_model

Using TensorFlow backend.


In [0]:
(X_trainval, y_trainval), (X_test, y_test) = fashion_mnist.load_data()
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, random_state=42)

In [0]:
# boilerplate from the 'dimensionality_reduction.ipunb'

from sklearn.base import BaseEstimator, TransformerMixin

class Flattener(BaseEstimator, TransformerMixin):
    def __init__(self):
        return None
    
    def fit(self, X, y=None):
        _, *self.img_dims = X.shape
        self.pixel_count = 1
        for dim in self.img_dims:
            self.pixel_count *= dim
        return self
        
    def transform(self, X):
        return X.reshape(-1, self.pixel_count).astype('float64')
    
    def inverse_transform(self, X):
        return X.reshape(-1, *self.img_dims)
      
from sklearn.base import BaseEstimator, TransformerMixin

class GreyScale(BaseEstimator, TransformerMixin):
    def __init__(self):
        return None
    
    def fit(self, X, y=None):
        _, *self.img_dims = X.shape
        return self
        
    def transform(self, X):
        return X.reshape(-1, *self.img_dims, 1)
    
    def inverse_transform(self, X):
        return X.reshape(-1, *self.img_dims)
      
class TruncatedSVD_my(BaseEstimator, TransformerMixin):
    def __init__(self, n):
        self.n = n
        return None
    
    def fit(self, X, y=None):
        _, self.s, Vh = np.linalg.svd(X, full_matrices=False)
        self.Vh_trunc = Vh[:self.n]
        return self
        
    def transform(self, X):
        # ( self.Vh_trunc @ X.T ).T
        return X @ self.Vh_trunc.T
    
    def inverse_transform(self, X):
        # (self.Vh.T @ X.T ).T
        return  X @ self.Vh_trunc

class AutoEncoder(BaseEstimator, TransformerMixin):
  
    def __init__(self, model):
        self.model_in = K.function([model.layers[0].input], [model.get_layer('20dim_out').output])
        self.model_out = K.function([model.get_layer('20dim_in').input], [model.layers[-1].output])
        return None
    
    def fit(self, X, y=None):
        return self
        
    def transform(self, X):
        return self.model_in([X])[0]
    
    def inverse_transform(self, X):
        return np.clip(self.model_out([X])[0], 0, 255)

      
flattener = Flattener().fit(X_train)
greyscale = GreyScale().fit(X_train)

In [0]:
encoder20_SVD_my = load(os.path.join(path,'encoder20_SVD_my.joblib'))

model1 = load_model(os.path.join(path,'model1.h5'))
encoder20_Dense = make_pipeline(flattener, AutoEncoder(model1))

In [0]:
model2 = load_model(os.path.join(path,'model2.h5'))
encoder20_Conv = make_pipeline(greyscale, AutoEncoder(model2))

---
Make sure that we are pushing the data through the 20-dimensional space.

Store two classes of filies:
* the raw 20-feature vectors (names: "rawX20..."")
* images decoded from the 20-dim vectors byt the corresponding decoders (names: "X20..."") 

In [7]:
X20a_train = encoder20_SVD_my.transform(X_train)
X20a_val = encoder20_SVD_my.transform(X_val)
X20a_test = encoder20_SVD_my.transform(X_test)

X20a_train.shape, X20a_val.shape, X20a_test.shape

((54000, 20), (6000, 20), (10000, 20))

In [8]:
dump(X20a_train, os.path.join(path,'rawX20a_train.joblib'))
dump(X20a_val, os.path.join(path,'rawX20a_val.joblib'))
dump(X20a_test, os.path.join(path,'rawX20a_test.joblib'))

['/content/gdrive/My Drive/Colab Notebooks/Fashion_MNIST/rawX20a_test.joblib']

In [0]:
dump(encoder20_SVD_my.inverse_transform(X20a_train), os.path.join(path,'X20a_train.joblib'))
dump(encoder20_SVD_my.inverse_transform(X20a_val), os.path.join(path,'X20a_val.joblib'))
dump(encoder20_SVD_my.inverse_transform(X20a_test), os.path.join(path,'X20a_test.joblib'))

['/content/gdrive/My Drive/Colab Notebooks/Fashion_MNIST/X20a_test.joblib']

In [10]:
X20b_train = encoder20_Dense.transform(X_train)
X20b_val = encoder20_Dense.transform(X_val)
X20b_test = encoder20_Dense.transform(X_test)

X20b_train.shape, X20b_val.shape, X20b_test.shape

((54000, 20), (6000, 20), (10000, 20))

In [11]:
dump(X20b_train, os.path.join(path,'rawX20b_train.joblib'))
dump(X20b_val, os.path.join(path,'rawX20b_val.joblib'))
dump(X20b_test, os.path.join(path,'rawX20b_test.joblib'))

['/content/gdrive/My Drive/Colab Notebooks/Fashion_MNIST/rawX20b_test.joblib']

In [0]:
dump(encoder20_Dense.inverse_transform(X20b_train), os.path.join(path,'X20b_train.joblib'))
dump(encoder20_Dense.inverse_transform(X20b_val), os.path.join(path,'X20b_val.joblib'))
dump(encoder20_Dense.inverse_transform(X20b_test), os.path.join(path,'X20b_test.joblib'))

['/content/gdrive/My Drive/Colab Notebooks/Fashion_MNIST/X20b_test.joblib']

In [7]:
X20c_train_A = encoder20_Conv.transform(X_train[:10_000])
dump(X20c_train_A, os.path.join(path,'rawX20c_train_A.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_train_A), os.path.join(path,'X20c_train_A.joblib'))

X20c_train_A.shape

(10000, 2, 2, 5)

In [7]:
X20c_train_B = encoder20_Conv.transform(X_train[10_000:20_000])
dump(X20c_train_B, os.path.join(path,'rawX20c_train_B.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_train_B), os.path.join(path,'X20c_train_B.joblib'))

X20c_train_B.shape

(10000, 2, 2, 5)

In [7]:
X20c_train_C = encoder20_Conv.transform(X_train[20_000:30_000])
dump(X20c_train_C, os.path.join(path,'rawX20c_train_C.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_train_C), os.path.join(path,'X20c_train_C.joblib'))

X20c_train_C.shape

(10000, 2, 2, 5)

In [7]:
X20c_train_D = encoder20_Conv.transform(X_train[30_000:40_000])
dump(X20c_train_D, os.path.join(path,'rawX20c_train_D.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_train_D), os.path.join(path,'X20c_train_D.joblib'))

X20c_train_D.shape

(10000, 2, 2, 5)

In [7]:
X20c_train_E = encoder20_Conv.transform(X_train[40_000:50_000])
dump(X20c_train_E, os.path.join(path,'rawX20c_train_E.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_train_E), os.path.join(path,'X20c_train_E.joblib'))

X20c_train_E.shape

(10000, 2, 2, 5)

In [7]:
X20c_train_F = encoder20_Conv.transform(X_train[50_000:])
X20c_val = encoder20_Conv.transform(X_val)
X20c_test = encoder20_Conv.transform(X_test)

dump(X20c_train_F, os.path.join(path,'rawX20c_train_F.joblib'))
dump(X20c_val, os.path.join(path,'rawX20c_val.joblib'))
dump(X20c_test, os.path.join(path,'rawX20c_test.joblib'))

dump(encoder20_Conv.inverse_transform(X20c_train_F), os.path.join(path,'X20c_train_F.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_val), os.path.join(path,'X20c_val.joblib'))
dump(encoder20_Conv.inverse_transform(X20c_test), os.path.join(path,'X20c_test.joblib'))

X20c_train_F.shape, X20c_val.shape, X20c_test.shape

((4000, 2, 2, 5), (6000, 2, 2, 5), (10000, 2, 2, 5))

---
## End