In [144]:
# %%bash
# pip install datalab
# pip install tensorflow-gpu

In [145]:
#Import required libraries
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import os

# from keras.models import Sequential
# from nltk.tokenize import word_tokenize
# from sklearn.metrics.pairwise import cosine_similarity
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.model_selection import train_test_split, cross_val_score
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from sklearn.naive_bayes import GaussianNB
# from xgboost.sklearn import XGBClassifier
# https://machinelearningmastery.com/clean-text-machine-learning-python/
print(tf.__version__)

1.10.0


In [146]:
# from google.colab import drive
# drive.mount('/content/drive')

In [147]:
#Check available devices
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18074285931112581377
]


In [148]:
#Load training data
all_data = pd.read_csv('train_data.csv')
all_data.head()

Unnamed: 0,text,class
0,never talk to me again,3
1,I am proud of your achievements,2
2,It is the worst day in my life,3
3,Miss you so much,0
4,food is life,4


In [149]:
#Create one hot encoding of labels
def create_yoh_list(v_classes, num_classes):
    yoh_class = np.zeros((len(v_classes),num_classes))
    
    for ix, v_cl in enumerate(v_classes):
        yoh_class[ix, v_cl] = 1
    
    return(yoh_class)

In [150]:
#Test one hot encoder code
create_yoh_list([1,3,4,2],num_classes = 5)

array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.]])

In [151]:
#Def class to text
def class_to_text(cls):
    dict_class = {0:'love', 1:'playful', 2:'happy',3:'sad', 4:'foodie'}
    return dict_class[cls]

class_to_text(2)

'happy'

In [152]:
#Random split dataset into Train and Validation
train_data = all_data.sample(frac=0.95,random_state=1)
valid_data = all_data.drop(train_data.index)

train_data.info()
valid_data.info()

#Files To be used in Tensorflow pipeline dataset API
train_data.to_csv("data_train.csv", index=False, header=False)
valid_data.to_csv("data_eval.csv", index=False, header=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 179 entries, 31 to 134
Data columns (total 2 columns):
text     179 non-null object
class    179 non-null int64
dtypes: int64(1), object(1)
memory usage: 4.2+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 9 entries, 37 to 144
Data columns (total 2 columns):
text     9 non-null object
class    9 non-null int64
dtypes: int64(1), object(1)
memory usage: 216.0+ bytes


In [153]:
def add_engineered(inp):
    return inp

In [154]:
# Define your feature columns
def create_feature_cols():
    embedded_text_feature_column = hub.text_embedding_column(key="text", 
                                    module_spec="https://tfhub.dev/google/nnlm-en-dim128/1",
                                    trainable=False)
    return [embedded_text_feature_column]

In [155]:
CSV_COLUMNS = train_data.columns  
LABEL_COLUMN = 'class'
DEFAULTS = [['no'],[0]]  #Default values

def read_dataset(filename, mode, batch_size = 512):
    def _input_fn(v_test=False):
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            return add_engineered(features), label
        
        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename)

        # Create dataset from file list
        dataset = tf.data.TextLineDataset(file_list).map(decode_csv)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:
            num_epochs = 1 # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        batch_features, batch_labels = dataset.make_one_shot_iterator().get_next()
        
        #Begins - Uncomment for testing only -----------------------------------------------------<
        if v_test == True:
            with tf.Session() as sess:
                print(sess.run(batch_features))
        #End - Uncomment for testing only -----------------------------------------------------<
        return batch_features, batch_labels
    return _input_fn

In [156]:
#Test dataset read function
eval_file = os.getcwd() + "\data_train.csv"
fn_d = read_dataset(filename = eval_file,
                    mode = tf.estimator.ModeKeys.EVAL,
                    batch_size = 20)

fn_d(v_test=True)

{'text': array([b'How dare you ask that', b'I want chinese food',
       b'you did not do your homework',
       b'She is the cutest person I have ever seen',
       b'I am at the baseball game', b'I love taking breaks', b'you suck',
       b'Good joke', b'You are not qualified for this position',
       b'she is happy', b'you are a loser',
       b'Sounds like a fun plan ha ha',
       b'I was waiting for her for two hours ',
       b'I worked during my birthday', b'I want to eat',
       b'cookies are good', b'where is the stadium', b'we made it',
       b'food is life', b'this joke is killing me haha'], dtype=object)}


({'text': <tf.Tensor 'IteratorGetNext_5:0' shape=(?,) dtype=string>},
 <tf.Tensor 'IteratorGetNext_5:1' shape=(?,) dtype=int32>)

In [157]:
# # Create pandas input function for training
# def make_input_fn(df_f, num_epochs):
#   return tf.estimator.inputs.pandas_input_fn(
#     x = df_f,
#     y = df_f['label'],
#     batch_size = 128,
#     num_epochs = num_epochs,
#     shuffle = True,
#     queue_capacity = 200,
#     num_threads = 1
#   )

# #Input Function for Train and Validation Set
# def get_validation_input_fn(data_set, num_epochs=1, shuffle=False):
#   return tf.estimator.inputs.pandas_input_fn(
#       x = data_set,
#       y = data_set['label'],
#       num_epochs=num_epochs,
#       shuffle=shuffle)

# Serving function for external call
def serving_fn():
    feature_placeholders  = {'text' : tf.placeholder(tf.string, [None])
    }

    #Features with transformation logic
    features = {
                key: tf.expand_dims(tensor, -1)
                for key, tensor in feature_placeholders.items()
            }
    
    #feat_changed = add_engineered(features.copy())
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders )

In [164]:
# Create estimator train and evaluate function
def train_and_evaluate_dataset(output_dir, num_train_steps, train_file, eval_file):    
##### Create Canned estimator instance
    run_config = tf.estimator.RunConfig(save_checkpoints_secs = 40, 
                                        keep_checkpoint_max = 3)
    estimator = tf.estimator.DNNClassifier(feature_columns=create_feature_cols(),
                                          n_classes=5,
                                          hidden_units=[256,128,64,32],
                                          dropout = 0.3,
                                          #optimizer=get_stepw_decay_optimizer)
                                          optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
                                          config = run_config)
    train_spec = tf.estimator.TrainSpec(input_fn = read_dataset(
                                                filename = train_file,
                                                mode = tf.estimator.ModeKeys.TRAIN,
                                                batch_size = 128), 
                                      max_steps = num_train_steps)
    exp = tf.estimator.LatestExporter("decision", serving_fn)
    eval_spec = tf.estimator.EvalSpec(input_fn = read_dataset(
                                                filename = eval_file,
                                                mode = tf.estimator.ModeKeys.EVAL,
                                                batch_size = 128), 
                                    steps = None, 
                                    exporters = exp,
                                    start_delay_secs = 1, # start evaluating after N seconds, 
                                    throttle_secs = 20)  # evaluate every N seconds
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [165]:
train_file = os.getcwd() + "\data_train.csv"
eval_file = os.getcwd() + "\data_eval.csv"

train_and_evaluate_dataset(None, 10000,train_file,eval_file)

INFO:tensorflow:Using config: {'_service': None, '_evaluation_master': '', '_device_fn': None, '_master': '', '_log_step_count_steps': 100, '_keep_checkpoint_max': 3, '_save_checkpoints_secs': 40, '_keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001A42329DD30>, '_train_distribute': None, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_task_type': 'worker', '_task_id': 0, '_model_dir': 'C:\\Users\\hrafiq\\AppData\\Local\\Temp\\tmpok2g_4vx', '_tf_random_seed': None, '_global_id_in_cluster': 0, '_session_config': None, '_num_ps_replicas': 0, '_is_chief': True, '_num_worker_replicas': 1}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 40.
INFO:tensorflow:Calling mo

INFO:tensorflow:Assets written to: C:\Users\hrafiq\AppData\Local\Temp\tmpok2g_4vx\export\decision\temp-b'1543944698'\assets
INFO:tensorflow:SavedModel written to: C:\Users\hrafiq\AppData\Local\Temp\tmpok2g_4vx\export\decision\temp-b'1543944698'\saved_model.pb
INFO:tensorflow:global_step/sec: 13.4851
INFO:tensorflow:loss = 0.7721937, step = 5001 (7.416 sec)
INFO:tensorflow:global_step/sec: 140.665
INFO:tensorflow:loss = 1.4177265, step = 5101 (0.711 sec)
INFO:tensorflow:global_step/sec: 143.087
INFO:tensorflow:loss = 6.8073373, step = 5201 (0.699 sec)
INFO:tensorflow:global_step/sec: 146.02
INFO:tensorflow:loss = 1.8989053, step = 5301 (0.685 sec)
INFO:tensorflow:global_step/sec: 123.277
INFO:tensorflow:loss = 1.503239, step = 5401 (0.811 sec)
INFO:tensorflow:global_step/sec: 121.475
INFO:tensorflow:loss = 0.7474178, step = 5501 (0.823 sec)
INFO:tensorflow:global_step/sec: 102.184
INFO:tensorflow:loss = 0.20886523, step = 5601 (0.980 sec)
INFO:tensorflow:global_step/sec: 116.237
INFO:te

KeyboardInterrupt: 

In [0]:
from google.datalab.ml import TensorBoard
OUTDIR='/tmp/tmp5qwzs4y8'
print(OUTDIR)
TensorBoard().start(OUTDIR)