## Spiral toy tutorial

In [5]:
%matplotlib inline
import numpy as np
from datetime import datetime
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
np.random.seed(7)

## Define parameters

In [6]:
no_of_layers = 4 # number of hidden layers except input and output layer
input_layer = 7
layer1 = 100
layer2 = 100
layer3 = 100
layer4 = 100
output_layer = 1
output_class = 4 # Number of classes
N = 100 # number of training examples
learning_rate = 0.001
training_epochs = 150
batch_size = 10
display_step = 1
keep_prob = 1.0

## Data generation
In this section we will generate simple spiral toy data. The function is written in such a way that it can have different number of classes based on the give input.

In [7]:
def get_stress_data(no_of_data):
    data = pd.read_csv('stress_data_shuffled.csv')
    data = data.values
    data = data[0:no_of_data, :]
    X = data[:no_of_data, :7] # [it looks like it is 1 indexed]
    Y = data[:no_of_data, 9] # shear stress [it looks like it is 0 indexed]
    return X, Y

# get_stress_data(1000)


## Get data visualize

In [8]:
X, Y = get_stress_data(10000)
X, Y = shuffle(X, Y, random_state=0)
X = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True).fit_transform(X)
Y = preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True).fit_transform(Y.T)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
total_len_train = X_train.shape[0]
total_len_test = X_test.shape[0]



In [9]:
def weight_variable(layer1_dim, layer2_dim):
    return tf.Variable(tf.truncated_normal([layer1_dim, layer2_dim], 0, 0.01))

def bias_variable(layer_dim):
    return tf.Variable(tf.truncated_normal([layer_dim], 0, 0.01))

def variable_summaries(var, name):
  """Attach a lot of summaries to a Tensor."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean/' + name, mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev/' + name, stddev)
    tf.summary.scalar('max/' + name, tf.reduce_max(var))
    tf.summary.scalar('min/' + name, tf.reduce_min(var))
    tf.summary.histogram(name, var)

def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
  """Reusable code for making a simple neural net layer.

  It does a matrix multiply, bias add, and then uses relu to nonlinearize.
  It also sets up name scoping so that the resultant graph is easy to read,
  and adds a number of summary ops.
  """
  # Adding a name scope ensures logical grouping of the layers in the graph.
  with tf.name_scope(layer_name):
    # This Variable will hold the state of the weights for the layer
    with tf.name_scope('weights'):
      weights = weight_variable(input_dim, output_dim)
      variable_summaries(weights, layer_name + '/weights')
    with tf.name_scope('biases'):
      biases = bias_variable(output_dim)
      variable_summaries(biases, layer_name + '/biases')
    with tf.name_scope('Wx_plus_b'):
      preactivate = tf.matmul(input_tensor, weights) + biases
      tf.summary.histogram(layer_name + '/pre_activations', preactivate)
    
    if act == None:
        activations = preactivate
    else:
        activations = act(preactivate, 'activation')
    
    tf.summary.histogram(layer_name + '/activations', activations)
  return activations

In [10]:

graph1 = tf.Graph()
with graph1.as_default():
    # %% tf.placeholders for the input and output of the network. Placeholders are
    # variables which we need to fill in when we are ready to compute the graph.
    X_placeholder = tf.placeholder(tf.float32, [None, input_layer])
    Y_placeholder = tf.placeholder(tf.float32, [None])
#     keep_prob = tf.placeholder(tf.float32)
    
    # hidden layers computation  
    hidden1 = nn_layer(X_placeholder, input_layer, layer1, 'layer1')
    hidden1 = tf.nn.dropout(hidden1, keep_prob)
    
    hidden2 = nn_layer(hidden1, layer1, layer2, 'layer2')
    hidden2 = tf.nn.dropout(hidden2, keep_prob)
    
    hidden3 = nn_layer(hidden2, layer2, layer3, 'layer3')
    hidden3 = tf.nn.dropout(hidden3, keep_prob)
    
    hidden4 = nn_layer(hidden3, layer3, layer4, 'layer4')
    logits  = nn_layer(hidden4, layer4, output_layer, 'output', None)
    pred = tf.transpose(logits)
    
    #  cost : squred mean   
    with tf.name_scope('cost'):         
        cost = tf.reduce_mean(tf.square(pred - Y_placeholder))
        tf.summary.scalar('cost', cost)
        
    #  Accuracy : squred mean   
    with tf.name_scope('accuracy'):
        correct_prediction = tf.subtract(tf.cast(1, 'float'), tf.reduce_mean(tf.subtract(pred, Y_placeholder)))
        accuracy = tf.cast(correct_prediction, "float")
        tf.summary.scalar('accuracy', accuracy)
                       
    #optimizer
    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
        

In [13]:
def feed_dict(train, i):
  """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
  if (train):
    xs = X_train[i*batch_size:(i+1)*batch_size]
    ys = Y_train[i*batch_size:(i+1)*batch_size]
  else:
    xs = X_test[i*batch_size:(i+1)*batch_size]
    ys = Y_test[i*batch_size:(i+1)*batch_size]
  return {X_placeholder: xs, Y_placeholder: ys}


# %% We create a session to use the graph
with tf.Session(graph=graph1) as sess:
    
    # save all the varibales and weights of this graph
    saver = tf.train.Saver()
    chkpt_path = "./checkpoints/spiral"
    
    # check if the there is an existing chkpt files
#     ckpt = tf.train.get_checkpoint_state("./checkpoints")
#     saver.restore(sess, tf.train.latest_checkpoint('./checkpoints'))
#     print("checkpoint: ", ckpt)
        
    # -----prepare tensor board visualization variables/weights --------
    # Merge all the summaries and write them out to output dir
    now = datetime.now()
    train_sub = 'train_' + now.strftime("%Y%m%d-%H%M")+'_drop_' + str(keep_prob)+'_layer_'+ str(no_of_layers) +'_data_'+ str(N)+ '_neuron_' + str(layer1) 
    test_sub = 'test_' + now.strftime("%Y%m%d-%H%M")+'_drop_' + str(keep_prob)+'_layer_'+ str(no_of_layers) +'_data_'+ str(N)+ '_neuron_' + str(layer1)
    
    train_path = './train/' + str(train_sub)
    test_path = './test/' + str(test_sub)
    
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(train_path, sess.graph)
    test_writer = tf.summary.FileWriter(test_path, sess.graph)
    # --------------- ######### --------------
    
    sess.run(tf.global_variables_initializer())
    
    trainng_loss = []
    epoch1 = []
    testing_loss = []
    epoch2 = []
    
    for epoch in range(training_epochs):
        
        ## Training      
        total_batch = int(total_len_train/batch_size)
        for i in range(total_batch):
            summary_train, _, a_train, loss_train, p_train = sess.run([merged, optimizer, accuracy, cost, pred], feed_dict = feed_dict(True, i))
        trainng_loss.append(loss_train)
        epoch1.append(epoch)
        train_writer.add_summary(summary_train, epoch)
        
        ## Testing
        if (epoch%1 == 0):
            total_batch = int(total_len_test/batch_size)
            p_test = []
            for i in range(total_batch):
                summary_test, _, a_test, loss_test, p_test = sess.run([merged, optimizer, accuracy, cost, pred], feed_dict = feed_dict(False, i))
                p_test
            testing_loss.append(loss_test)
            epoch2.append(epoch)
            test_writer.add_summary(summary_test, epoch)
        
        # printing  loss  
        if (epoch%1 == 0):
            print("traning epoch : ", epoch, "train loss : ",  loss_train, "test loss :", loss_test)
        
        # saving the entire graph and its variables
        if (epoch > (training_epochs-5)):
            saver.save(sess, chkpt_path, global_step=epoch)


('traning epoch : ', 0, 'train loss : ', 0.005384245, 'test loss :', 0.0045885616)
('traning epoch : ', 1, 'train loss : ', 0.0050126957, 'test loss :', 0.0046040635)
('traning epoch : ', 2, 'train loss : ', 0.0042492221, 'test loss :', 0.0044569196)
('traning epoch : ', 3, 'train loss : ', 0.0042396327, 'test loss :', 0.0046626953)
('traning epoch : ', 4, 'train loss : ', 0.0043349485, 'test loss :', 0.0047025373)
('traning epoch : ', 5, 'train loss : ', 0.0044457437, 'test loss :', 0.0046986202)
('traning epoch : ', 6, 'train loss : ', 0.0045018164, 'test loss :', 0.0045455857)
('traning epoch : ', 7, 'train loss : ', 0.0044925041, 'test loss :', 0.0044398201)
('traning epoch : ', 8, 'train loss : ', 0.0044024251, 'test loss :', 0.0043852022)
('traning epoch : ', 9, 'train loss : ', 0.0043473402, 'test loss :', 0.0044378005)
('traning epoch : ', 10, 'train loss : ', 0.0042155152, 'test loss :', 0.0045892051)
('traning epoch : ', 11, 'train loss : ', 0.0040659062, 'test loss :', 0.004

('traning epoch : ', 97, 'train loss : ', 0.0036712307, 'test loss :', 0.0049706423)
('traning epoch : ', 98, 'train loss : ', 0.0037552014, 'test loss :', 0.0050330386)
('traning epoch : ', 99, 'train loss : ', 0.0040082787, 'test loss :', 0.0053487546)
('traning epoch : ', 100, 'train loss : ', 0.0039977464, 'test loss :', 0.0050052088)
('traning epoch : ', 101, 'train loss : ', 0.0040713912, 'test loss :', 0.0051832986)
('traning epoch : ', 102, 'train loss : ', 0.004032976, 'test loss :', 0.0050850683)
('traning epoch : ', 103, 'train loss : ', 0.0040120063, 'test loss :', 0.0050800084)
('traning epoch : ', 104, 'train loss : ', 0.0039643054, 'test loss :', 0.004893356)
('traning epoch : ', 105, 'train loss : ', 0.0039492869, 'test loss :', 0.0049383836)
('traning epoch : ', 106, 'train loss : ', 0.0038998569, 'test loss :', 0.0050191358)
('traning epoch : ', 107, 'train loss : ', 0.004044889, 'test loss :', 0.0047242679)
('traning epoch : ', 108, 'train loss : ', 0.0039800964, 'te

ValueError: Parent directory of ./checkpoints/spiral doesn't exist, can't save.

## Some conclusions
####  Three class problems :
Overfitting can be solved by introducing dropout rules, however it does not work always as it is. For example if we have only two classes and four hidden layers and each layer has 200 neurons, 0.5 keep_prob trainng drop out does not help. If we decrease number of neurons to 100 overfitting reduced gives better result.

#### Four class problems :
1. Since we have more classes, we have to have to little more neurons in each layers to learn more while keeping the same number of hidden layer. 
2. 0.30 keep_prob makes underfitting
3. 0.35 keep_prob makes almost perfect fitting
4. 0.40 keep_prob makes almost the same fitting
5. 0.40 with 1000 trainning epoch makes the tesing loss curve underneath the training curve.
6. To overcome this problem we can try increasing keep_prob or number of layers or may be both.
7. We increased keep_prob to 0.6, not sure if its improved the result
lets try with decreasing no. of neurons to 150 in each layer keeping all the parameters same.
8. No. of neurons 250, validation curve is less
Still no good results. May be we have to increase number of layers.

9. 0.6 keep_prob and 250 neurons in each layer makes the both curve nice and adjustable, althouth test curve is little low than the traiing curve

10. increase number of datasets N= 4000 from 500. keep_prob=1.0

### possible reasons :
This is almost always due to the small size of test set.
