In [1]:
import tensorflow as tf
import numpy as np 
from sklearn import datasets
from sklearn import model_selection
import shutil
print(tf.__version__)

1.2.0


## Generate a Classification Dataset

In [2]:
n = 10000
f = 20
c = 4
X, y = datasets.make_classification(n_classes=c,
                                    n_features=f,
                                    n_clusters_per_class=2,
                                    n_informative=15,
                                    n_samples=n,
                                    random_state=250785)
X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X,y, test_size=0.75)
print("Training set size: {}".format(len(X_train)))
print("Validation set sie: {}".format(len(X_valid)))

Training set size: 2500
Validation set sie: 7500


## Batch Generation Function

In [3]:
def get_batch(batch_size, batch_index, X,y):
    start_index = batch_size * (batch_index-1) 
    end_index = batch_size*batch_index
    X_batch = X[start_index:end_index,:]
    y_batch = y[start_index:end_index]
    return X_batch, y_batch

## Direcotry Config

In [4]:
from datetime import datetime 
model_dir = "gs://ksalama-gcs-cloudml/ml-models/demo-ann_classifier"
now = datetime.utcnow().strftime("%Y%m%d%H%M%S") 
root_logdir = model_dir+"/logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [5]:
%%bash

gsutil -m rm -r gs://ksalama-gcs-cloudml/ml-models/demo-ann_classifier/logs

Removing gs://ksalama-gcs-cloudml/ml-models/demo-ann_classifier/logs/#1508074040237725...
Removing gs://ksalama-gcs-cloudml/ml-models/demo-ann_classifier/logs/run-20171015132718/#1508074040383910...
Removing gs://ksalama-gcs-cloudml/ml-models/demo-ann_classifier/logs/run-20171015132718/events.out.tfevents.1508074040.e507a7e75e04#1508074040873444...
/ [1/3 objects]  33% Done                                                       / [2/3 objects]  66% Done                                                       / [3/3 objects] 100% Done                                                       
Operation completed over 3 objects.                                              


## Model Training Function

In [6]:
def train_model(resume=True, epoch_count=10000, batch_size=500, learning_rate=0.001):
  
    print("run-{}".format(now))
    print("")
          
    batch_count = int(np.ceil(n / batch_size))
    
    X = tf.placeholder(shape=(None,f),dtype=tf.float32,name="X")
    y = tf.placeholder(shape=(None),dtype=tf.int32,name="y")
    
    with tf.name_scope("model"): 
        hidden1 = tf.layers.dense(X, 16, name ="hidden1", activation = tf.nn.relu) 
        hidden2 = tf.layers.dense(hidden1, 8, name ="hidden2", activation = tf.nn.relu) 
        logits = tf.layers.dense(hidden2, c, name ="logits")

    with tf.name_scope("loss"): 
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) 
        loss = tf.reduce_mean(xentropy, name="loss")

    with tf.name_scope("training"): 
        optimizer = tf.train.GradientDescentOptimizer(learning_rate) 
        training = optimizer.minimize(loss)

    with tf.name_scope("accuracy"): 
        correct = tf.nn.in_top_k(logits, y, 1) 
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
      
    with tf.name_scope("summary"): 
        accuracy_summary = tf.summary.scalar('ACCURACY', accuracy)
        xentropy_summary = tf.summary.scalar('XENTROPY', loss)
        summary = tf.summary.merge_all()
    
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

    init = tf.global_variables_initializer()
    saver = tf.train.Saver() 
    
    # start tensorflow session
    with tf.Session() as session:
        if resume:
            saver.restore(session, model_dir+"/model_final.ckpt")
        else:
            init.run()
            
        # print initial accuracy measures for training and validation sets
        train_init_accuray =  accuracy.eval(feed_dict={X: X_train, y:y_train})
        print("Training Set Initial Accuray:{}{}".format(round(float(train_init_accuray*100),2),"%"))
        valid_init_accuray =  accuracy.eval(feed_dict={X: X_valid, y:y_valid})
        print("Validation Set Initial Accuray:{}{}".format(round(float(valid_init_accuray*100),2),"%"))
        print("")
        
        # strat training iterations
        for epoch in range(1,epoch_count):
          
            for batch in range(1,batch_count):
                X_batch, y_batch = get_batch(batch_size,batch, X_train, y_train)
                session.run(training, feed_dict={X: X_batch, y:y_batch})
          
            if epoch % 500 == 0:
          
                step = (epoch*batch_count)+batch
          
                # print train and valid accuracy values at the current step
                print("Step:{}".format(step))
                tain_accuracy_value =  accuracy.eval(feed_dict={X:X_train, y:y_train})
                print("-- Tain Accuracy:{}{}".format(round(float(tain_accuracy_value)*100,2),"%"))
                valid_accuracy_value =  accuracy.eval(feed_dict={X:X_valid, y:y_valid})
                print("-- Valid Accuracy:{}{}".format(round(float(valid_accuracy_value)*100,2),"%"))
                print("")
          
                # write summary for tensorboard
                summary_values = summary.eval(feed_dict={X:X_train, y:y_train})
                file_writer.add_summary(summary_values, step)
          
                # save model checkpoint
                #saver.save(session,model_dir+"/model.ckpt".format(epoch))
        
        # print final train and validation accuracy values
        train_final_accuray =  accuracy.eval(feed_dict={X: X_train, y:y_train})
        print("Training Set Final Accuray:{}{}".format(round(float(train_final_accuray)*100,2),"%"))
        valid_final_accuray =  accuracy.eval(feed_dict={X: X_valid, y:y_valid})
        print("Validation Set Final Accuray:{}{}".format(round(float(valid_final_accuray)*100,2),"%"))
        print("")
       
        
        # save final model
        save_path = saver.save(session, model_dir+"/model_final.ckpt")

    file_writer.close()
    print("Final model was saved in {}".format(save_path))

In [7]:
train_model(resume=False)

run-20171015132814

Training Set Initial Accuray:26.72%
Validation Set Initial Accuray:29.24%

Step:10019
-- Tain Accuracy:51.24%
-- Valid Accuracy:50.4%

Step:20019
-- Tain Accuracy:59.28%
-- Valid Accuracy:57.91%

Step:30019
-- Tain Accuracy:63.52%
-- Valid Accuracy:61.84%

Step:40019
-- Tain Accuracy:65.96%
-- Valid Accuracy:63.92%

Step:50019
-- Tain Accuracy:68.52%
-- Valid Accuracy:65.56%

Step:60019
-- Tain Accuracy:69.8%
-- Valid Accuracy:67.41%

Step:70019
-- Tain Accuracy:71.72%
-- Valid Accuracy:68.87%

Step:80019
-- Tain Accuracy:72.52%
-- Valid Accuracy:70.05%

Step:90019
-- Tain Accuracy:74.08%
-- Valid Accuracy:71.25%

Step:100019
-- Tain Accuracy:75.12%
-- Valid Accuracy:72.23%

Step:110019
-- Tain Accuracy:75.96%
-- Valid Accuracy:73.09%

Step:120019
-- Tain Accuracy:76.88%
-- Valid Accuracy:73.57%

Step:130019
-- Tain Accuracy:77.56%
-- Valid Accuracy:74.21%

Step:140019
-- Tain Accuracy:78.08%
-- Valid Accuracy:74.92%

Step:150019
-- Tain Accuracy:79.08%
-- Valid Acc