## Coding Exercise #0903a

### 1. Dimensional reduction with PCA:

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
# Execute the codes in TensorFlow 2.0 without editing those in version 1.x (except for the contrib module).
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

#### 1.1. Read in data and preprocess:

In [None]:
# Read data.
data_raw = load_iris()
X = data_raw['data']
y = data_raw['target'].reshape(-1,1)

In [None]:
# Scaling
scaler = MinMaxScaler()                  
X_scaled = scaler.fit_transform(X)        

In [None]:
# Convert into a DataFrame and view.
header = ['SepalLength','SepalWidth','PetalLength','PetalWidth']      # Column names
df = pd.DataFrame(X_scaled,columns=header)
df.head(3)

#### 1.2. Display the PCA result:

In [None]:
# Reduce to 2 dimension.
pca = PCA(n_components = 2)
X_pca = pca.fit_transform(X_scaled) 

In [None]:
# Convert the integer labels into color names.
mycolor = []
for i in y:
    if i == 0:
        mycolor.append('red')
    elif i == 1:
        mycolor.append('green')
    else:
        mycolor.append('blue') 

In [None]:
# Visualize.
plt.scatter(X_pca[:,0],X_pca[:,1],marker="o",alpha=0.7, s=10, c=mycolor)
plt.show()

### 2. Dimensional reduction with AutoEncoder:

#### 2.1. Define an AutoEncoder model:

In [None]:
# Hyperparameters definition
n_input = 4             # Input layers has as many nodes as the number of variables.
n_hidden = 2            # The number of nodes in the hidden layer = 2 <= my target. 
n_output = n_input      # The output should have the same number of nodes as the input. 
learn_rate = 0.0001
n_epochs = 10001

In [None]:
# Variables definition
initializer = tf.variance_scaling_initializer()
W1 = tf.Variable(initializer([n_input, n_hidden]), dtype=tf.float32)
W2 = tf.Variable(initializer([n_hidden, n_output]), dtype=tf.float32)
b1 = tf.Variable(tf.zeros(n_hidden))
b2 = tf.Variable(tf.zeros(n_output))

In [None]:
# Placeholder definition
X_ph = tf.placeholder(tf.float32, shape=[None, n_input])

In [None]:
# AutoEncoder model definition
hidden_layer = tf.matmul(X_ph, W1) + b1                  # No activation.
y_model = tf.matmul(hidden_layer, W2) + b2

In [None]:
loss = tf.reduce_mean(tf.square(X_ph- y_model))         # 'X' takes the place of 'y'.

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate = learn_rate) 

In [None]:
train = optimizer.minimize(loss)

In [None]:
init = tf.global_variables_initializer()

#### 2.2. AutoEncoder training:

In [None]:
with tf.Session() as sess:
        sess.run(init)
        for i in range(n_epochs):
            my_feed = {X_ph:X_scaled}
            sess.run(train, feed_dict = my_feed)
            if i % 1000 == 0: 
                mse = sess.run(loss, feed_dict = my_feed)
                print("Step : {}    ,    MSE : {}".format(i, mse))
        X_auto = sess.run(hidden_layer, feed_dict = my_feed)       # Get the reduced dimensional representation from the hidden layer!!!

#### 2.3. Compare the results of PCA vs AutoEncoder:

In [None]:
fig=plt.figure(figsize=(10,5), dpi=80)       # figsize= (Width, Height). Set DPI.
axes1 = fig.add_axes([0,0,0.4,1])            # Left, Bottom, Width, Height
axes2 = fig.add_axes([0.5,0,0.4,1])          # Left, Bottom, Width, Height
axes1.scatter(X_pca[:,0],X_pca[:,1],marker="o",alpha=0.7, s=10, c=mycolor)  
axes2.scatter(X_auto[:,0],X_auto[:,1],marker="o",alpha=0.7, s=10, c=mycolor)  
axes1.set_xlabel('X')
axes1.set_ylabel('Y')
axes1.set_title('PCA')
axes2.set_xlabel('X')
axes2.set_ylabel('Y')
axes2.set_title('AutoEncoder')
plt.show()