## Optmizing the fully connected codes 

Using fully connected layer codes rather than the softmax codes and then using the stnadard cloning procedure

In [1]:
%matplotlib inline
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [2]:
from sklearn import datasets

### Data handling

In [3]:
data = datasets.load_iris()

In [4]:
X = data['data']
y = data['target']
print X.shape
print y.shape

(150, 4)
(150,)


In [5]:
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)
print np.std(X)
print np.mean(X)

1.0
-1.32634644009e-15


In [6]:
y = pd.get_dummies(y).values
print y.shape

(150, 3)


In [7]:
data['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [8]:
for i in range(len(data['feature_names'])):
    print 'Feature name:{}\nLow: {}\tHigh: {}'.format(data['feature_names'][i], np.min(X[:,i]), np.max(X[:,i]))

Feature name:sepal length (cm)
Low: -1.87002413385	High: 2.49201920212
Feature name:sepal width (cm)
Low: -2.43898725249	High: 3.11468391068
Feature name:petal length (cm)
Low: -1.56873522072	High: 1.78634131465
Feature name:petal width (cm)
Low: -1.44444969728	High: 1.71090158319


In [9]:
## Generating random dataset
data_size = 1000
df = {}
df['sepal_length(cm)'] = np.random.uniform(low=-2, high=3,size=data_size)
df['sepal_width(cm)'] = np.random.uniform(low=-3, high=4,size=data_size)
df['petal_length(cm)'] = np.random.uniform(low=-2, high=2,size=data_size)
df['petal_width(cm)'] = np.random.uniform(low=-2, high=2,size=data_size)
random_dataset = pd.DataFrame(data=df, columns=df.keys())
print random_dataset.head(3)

   sepal_length(cm)  sepal_width(cm)  petal_length(cm)  petal_width(cm)
0         -0.760405        -0.640145         -0.782428        -0.113954
1          2.956555        -1.620762          0.234114         0.969245
2          2.809062         1.532508         -0.800492        -0.532507


In [10]:
print random_dataset.shape

(1000, 4)


### Constructing the model

In [27]:
NUM_FEATURES = 4
NUM_CLASSES = 3
NUM_HIDDEN = 128
N_EPOCHS = 2000
LEARNING_RATE = 1e-4
SKIP_STEP = 500

In [28]:
tf.reset_default_graph()

In [29]:
with tf.variable_scope("Placeholder") as scope:
    input_data = tf.placeholder(dtype=tf.float32, shape = [None, NUM_FEATURES], name="input_data")
    labels = tf.placeholder(dtype=tf.float32, shape = [None, NUM_CLASSES], name="labels")

In [30]:
with tf.variable_scope("hidden_layer") as scope:
    w = tf.Variable(initial_value = tf.random_normal(shape=[NUM_FEATURES, NUM_HIDDEN]), name="weights")
    b = tf.Variable(initial_value = tf.random_normal(shape=[NUM_HIDDEN]), name="biases")
    activation = tf.matmul(input_data, w) + b
    hidden = tf.nn.relu(activation)

In [31]:
with tf.variable_scope("output_layer") as scope:
    w = tf.Variable(initial_value=tf.random_normal(shape=[NUM_HIDDEN, NUM_CLASSES]), name="weights")
    b = tf.Variable(initial_value=tf.random_normal(shape=[NUM_CLASSES]), name="biases")
    activation = tf.matmul(hidden, w) + b
    softmax = tf.nn.softmax(activation)

In [16]:
with tf.variable_scope("loss") as scope:
    loss = tf.reduce_mean(-tf.reduce_sum(labels*(tf.log(softmax)), reduction_indices=[1]))

In [17]:
with tf.variable_scope("optimizer") as scope:
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

In [19]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    for i in range(1, N_EPOCHS+1):
        
        _, l = sess.run([optimizer, loss], feed_dict={input_data:X, labels:y})
        
        if i%SKIP_STEP == 0:
            
            print 'Epoch: {}\n Loss: {}'.format(i, l)
    
    ## Prediction of the model on the random dataset
    codes = sess.run([activation], feed_dict={input_data:random_dataset.values})
    print type(codes)

Epoch: 500
 Loss: 2.45589637756
Epoch: 1000
 Loss: 0.367829591036
Epoch: 1500
 Loss: 0.149226397276
Epoch: 2000
 Loss: 0.107043899596
<type 'list'>


In [24]:
print codes[0].shape

(1000, 3)


### White box

In [32]:
with tf.variable_scope("codes") as scope:
    fc_codes = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name="fc_codes")

In [36]:
with tf.variable_scope("new_loss") as scope:
    new_loss = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(fc_codes, activation),reduction_indices=[1]))

In [34]:
with tf.variable_scope("new_optimizer") as scope:
    new_optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(new_loss)

In [39]:
N_EPOCHS = 5000

In [40]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    for i in range(1, N_EPOCHS+1):
        
        _, l = sess.run([new_optimizer, new_loss], feed_dict={input_data:random_dataset.values, fc_codes:codes[0]})
        
        if (i % SKIP_STEP) == 0:
            
            print 'Epoch: {}\nLoss: {}'.format(i,l)
        
    ## Predicting output for the original dataset
    op = sess.run([softmax], feed_dict={input_data: X})

Epoch: 500
Loss: 1776.35705566
Epoch: 1000
Loss: 934.065979004
Epoch: 1500
Loss: 526.511657715
Epoch: 2000
Loss: 316.689697266
Epoch: 2500
Loss: 201.941390991
Epoch: 3000
Loss: 136.450576782
Epoch: 3500
Loss: 97.4627685547
Epoch: 4000
Loss: 73.0879745483
Epoch: 4500
Loss: 57.2642326355
Epoch: 5000
Loss: 46.6475868225


In [44]:
print np.mean(np.equal(np.argmax(op[0],1),np.argmax(y,1)))

0.886666666667


** The accuracy was changed. I don't remember if it increased or not. NEED TO CHECK IT OUT. As far as I can remember the results still were not that GOOD **