# Autoencoders

In [14]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import datasets

## Define autoencoder

In [23]:
def create_layer(x, input_size, output_size, name):
    """Helper function for creating single layer"""
    with tf.name_scope(name):
        weights = tf.Variable(tf.random_normal([input_size, output_size], dtype=tf.float32), name='weigths')
        biases = tf.Variable(tf.zeros([output_size]), name='biases')
        return tf.matmul(x, weights) + biases

    
class Autoencoder(object):
    
    def __init__(self, input_size, hidden_size, epoch=250, learning_rate=0.001):
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_size])
        self.encoder = tf.nn.tanh(create_layer(self.x, input_size, 1, 'encoder'))
        self.decoder = create_layer(self.encoder, 1, input_size, 'decoder')
        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, self.decoder))))
        self.train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(self.loss)
        self.saver = tf.train.Saver()
        
    def train(self, data):
        num_samples = len(data)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epoch):
                for j in range(num_samples):
                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x:[data[j]]})
                if i % (self.epoch // 10) == 0:
                    print('epoch {}, loss={}'.format(i, l))     
                    self.saver.save(sess, '../models/autoencoder1.ckpt')
            self.saver.save(sess, '../models/autoencoder1.ckpt')
            
    def test(self, data):
        with tf.Session() as sess:
            self.saver.restore(sess, '../models/autoencoder1.ckpt')
            hidden, output = sess.run([self.encoder, self.decoder], feed_dict={self.x: data})
        print('input', data)
        print('compressed', hidden)
        print('output', output)
        return output

## Train autoencoder on Iris dataset

In [28]:
data = datasets.load_iris().data
hidden_size = 1
input_size = len(data[0])
ae = Autoencoder(input_size, hidden_size)
ae.train(data)

epoch 0, loss=3.7631583213806152
epoch 25, loss=0.5827294588088989
epoch 50, loss=0.4093671143054962
epoch 75, loss=0.4083506762981415
epoch 100, loss=0.4083426892757416
epoch 125, loss=0.40822848677635193
epoch 150, loss=0.3968602120876312
epoch 175, loss=0.3912559151649475
epoch 200, loss=0.37086042761802673
epoch 225, loss=0.33666616678237915


# Test on sample data

In [29]:
ae.test(data[[8, 4]])

INFO:tensorflow:Restoring parameters from ../models/autoencoder1.ckpt
input [[ 4.4  2.9  1.4  0.2]
 [ 5.   3.6  1.4  0.2]]
compressed [[ 0.16788286]
 [ 0.12539166]]
output [[ 4.85602427  3.21752739  2.28904295  0.55845821]
 [ 4.76693153  3.23697138  2.14088035  0.49484342]]


array([[ 4.85602427,  3.21752739,  2.28904295,  0.55845821],
       [ 4.76693153,  3.23697138,  2.14088035,  0.49484342]], dtype=float32)