#### Useful Links: 
  * [Watson Nao Robot Notebook](https://github.com/IBM/watson-nao-robot/blob/master/Notebook/Robo_Notebook.ipynb)
  * [Watson Document Co-Relation](https://github.com/IBM/watson-document-co-relation)
    

In [1]:
# INSTALL DEPENDENCIES
import tensorflow as tf
if(tf.__version__ == '1.9.0'):
    print(tf.__version__)
else:
    !pip install --upgrade tensorflow
    print(tf.__version__)

!pip install -U ibm-cos-sdk
!pip install tflearn
!pip install --upgrade nltk
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

!pip install -U socketIO_client_nexus
  
import pandas as pd
import numpy as np
import tflearn
import random

import os.path
from os import path

from io import  StringIO
import requests
import json
from datetime import datetime
import time

# things we need for NLP
import nltk
from nltk.cluster.util import cosine_distance
from nltk import word_tokenize,sent_tokenize,ne_chunk
from nltk.corpus import stopwords
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

import sys
import types
from botocore.client import Config
import ibm_boto3

1.9.0
Requirement already up-to-date: ibm-cos-sdk in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages
Requirement not upgraded as not directly required: ibm-cos-sdk-core==2.*,>=2.0.0 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ibm-cos-sdk)
Requirement not upgraded as not directly required: ibm-cos-sdk-s3transfer==2.*,>=2.0.0 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ibm-cos-sdk)
Requirement not upgraded as not directly required: docutils>=0.10 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ibm-cos-sdk-core==2.*,>=2.0.0->ibm-cos-sdk)
Requirement not upgraded as not directly required: jmespath<1.0.0,>=0.7.1 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ibm-cos-sdk-core==2.*,>=2.0.0->ibm-cos-sdk)
Requirement not upgraded as not directly required: python-dateutil<3.0.0,>=2.1 in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ibm-cos-sdk-core==2.*,>=2.0.0->ibm-cos-sdk)
Requirement not

In [2]:
# The following code contains the credentials for a file in your IBM Cloud Object Storage.
# You might want to remove those credentials before you share your notebook.
def update_configuration(conf):
    global config
    config = conf
    print(config["cos_credentials"])
    print(config["cos_data"])


In [3]:
def multi_part_upload(bucket_name, item_name, file_path):
    try:
        print("Starting file transfer for {0} to bucket: {1}\n".format(item_name, bucket_name))
        cos = ibm_boto3.resource(service_name='s3',
            ibm_api_key_id=config["cos_credentials"]['IBM_API_KEY_ID'],
            ibm_auth_endpoint=config["cos_credentials"]['IBM_AUTH_ENDPOINT'],
            config=Config(signature_version='oauth'),
            endpoint_url=config["cos_credentials"]['ENDPOINT'])
        # set 5 MB chunks
        part_size = 1024 * 1024 * 5

        # set threadhold to 15 MB
        file_threshold = 1024 * 1024 * 15

        # set the transfer threshold and chunk size
        transfer_config = ibm_boto3.s3.transfer.TransferConfig(
            multipart_threshold=file_threshold,
            multipart_chunksize=part_size
        )

        # the upload_fileobj method will automatically execute a multi-part upload 
        # in 5 MB chunks for all files over 15 MB
        with open(file_path, "rb") as file_data:
            cos.Object(bucket_name, item_name).upload_fileobj(
                Fileobj=file_data,
                Config=transfer_config
            )

        print("Transfer for {0} Complete!\n".format(item_name))
    except Exception as e:
        print("Unable to complete multi-part upload: {0}".format(e))


In [4]:
def get_object_cos(bucket_name, item_name, path_to_download):
    try:
        print("Fetching file {0} from bucket: {1}\n".format(item_name, bucket_name))
        cos = ibm_boto3.resource(service_name='s3',
            ibm_api_key_id=config["cos_credentials"]['IBM_API_KEY_ID'],
            ibm_auth_endpoint=config["cos_credentials"]['IBM_AUTH_ENDPOINT'],
            config=Config(signature_version='oauth'),
            endpoint_url=config["cos_credentials"]['ENDPOINT'])
        
        cos.Object(bucket_name, item_name).download_file(path_to_download)

        print("Download for {0} Complete!\n".format(item_name))
    except Exception as e:
        print("Unable to download file: {0}".format(e))


In [5]:
# LOAD DATA
def load_data():
    global df
    global cos
    def __iter__(self): return 0

    # The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
    cos = ibm_boto3.client(service_name='s3',
        ibm_api_key_id=config["cos_credentials"]['IBM_API_KEY_ID'],
        ibm_auth_endpoint=config["cos_credentials"]['IBM_AUTH_ENDPOINT'],
        config=Config(signature_version='oauth'),
        endpoint_url=config["cos_credentials"]['ENDPOINT'])

    body = cos.get_object(Bucket=config["cos_data"]['BUCKET'],Key=config["cos_data"]['FILE'])['Body']
    # add missing __iter__ method, so pandas accepts body as file-like object
    if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

    df = pd.read_csv(body)
    df.head()   


In [6]:
def prepare_documents():
    global classes
    global documents
    global words
    classes = []
    documents = []
    words = []
    ignore_words = ['?']    
    
    # loop through each sentence in our intents patterns
    for i in range(len(df)):
        # tokenize each word in the sentence
        w = nltk.word_tokenize(df["utterances"][i])
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, df["intent"][i]))
        # add to our classes list
        if df["intent"][i] not in classes:
            classes.append(df["intent"][i])

    # stem and lower each word and remove duplicates
    words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
    words = sorted(list(set(words)))

    # remove duplicates
    classes = sorted(list(set(classes)))

    print (len(documents), "documents")
    print (len(classes), "classes", classes)
    # print (len(words), "unique stemmed words", words)


In [7]:
# create our training data
def prepare_for_training():
    training = []
    output = []
    global train_x
    global train_y
    # create an empty array for our output
    output_empty = [0] * len(classes)
    # training set, bag of words for each sentence
    for doc in documents:
        # initialize our bag of words
        bag = []
        # list of tokenized words for the pattern
        pattern_words = doc[0]
        # stem each word
        pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
        # create our bag of words array
        for w in words:
            bag.append(1) if w in pattern_words else bag.append(0)

        # output is a '0' for each tag and '1' for current tag
        output_row = list(output_empty)
        output_row[classes.index(doc[1])] = 1

        training.append([bag, output_row])
        
    # shuffle our features and turn into np.array
    random.shuffle(training)
    training = np.array(training)
    
    # create train and test lists
    train_x = list(training[:,0])
    train_y = list(training[:,1])


In [8]:
# CREATE ML MODEL
def create_model():
    # reset underlying graph data
    tf.reset_default_graph()
    # Build neural network
    net = tflearn.input_data(shape=[None, len(train_x[0])])
    net = tflearn.fully_connected(net, 8)
    net = tflearn.fully_connected(net, 8)
    net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
    net = tflearn.regression(net)

    # Define model and setup tensorboard
    global model
    model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
    # Start training (apply gradient descent algorithm)
    model.fit(train_x, train_y, n_epoch=150, batch_size=8, show_metric=True)
    model.save('model.tflearn')
    print("<<<<<<<< ML MODEL CREATED AND SAVED >>>>>>>>>>>\n\n")

In [9]:
# save all of our data structures
def sava_data_pickle():
    import pickle
    pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "car_training_data", "wb" ) )
    print("<<<<<<<< MODEL DATA SAVED IN PICKLE >>>>>>>>>>>\n\n")
    

In [10]:
def save_model_COS():
    multi_part_upload(config["cos_data"]['BUCKET'], "model/model.tflearn.data-00000-of-00001", "model.tflearn.data-00000-of-00001")
    multi_part_upload(config["cos_data"]['BUCKET'], "model/model.tflearn.meta", "model.tflearn.meta")
    multi_part_upload(config["cos_data"]['BUCKET'], "model/model.tflearn.index", "model.tflearn.index")
    multi_part_upload(config["cos_data"]['BUCKET'], "model/checkpoint", "checkpoint")
#     multi_part_upload(config["cos_data"]['BUCKET'], "model/tflearn_logs", "tflearn_logs")
    multi_part_upload(config["cos_data"]['BUCKET'], "training_data/car_training_data", "car_training_data")

In [11]:
# The code was removed by DSX for sharing.

# Code to Classify text using the ML Model created

In [12]:
def fetch_ml_model_cos():
    if(path.exists('model.tflearn.data-00000-of-00001') == False):
        get_object_cos(config["cos_data"]['BUCKET'], "model/model.tflearn.data-00000-of-00001", "model.tflearn.data-00000-of-00001")
    if(path.exists('model.tflearn.meta') == False):
        get_object_cos(config["cos_data"]['BUCKET'], "model/model.tflearn.meta", "model.tflearn.meta")
    if(path.exists('model.tflearn.index') == False):
        get_object_cos(config["cos_data"]['BUCKET'], "model/model.tflearn.index", "model.tflearn.index")
    if(path.exists('checkpoint') == False):
        get_object_cos(config["cos_data"]['BUCKET'], "model/checkpoint", "checkpoint")
#     if(path.exists('model.tflearn.data-00000-of-00001') == False):
        #     multi_part_upload(config["cos_data"]['BUCKET'], "model/tflearn_logs", "tflearn_logs")
    if(path.exists('car_training_data') == False):
        get_object_cos(config["cos_data"]['BUCKET'], "training_data/car_training_data", "car_training_data")
    

In [13]:
def load_model():
    global model
    try:
        model
    except NameError:
        print("<<< ML Model Needs to be loaded >>>>>")
        net = tflearn.input_data(shape=[None, len(train_x[0])])
        net = tflearn.fully_connected(net, 8)
        net = tflearn.fully_connected(net, 8)
        net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
        net = tflearn.regression(net)

        # Define model and setup tensorboard
        model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
        # load our saved model
        fetch_ml_model_cos()
        model.load('model.tflearn')        
    else:
        print("<<< ML Model Already Exists >>>>>")        


In [14]:
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [15]:
# create a data structure to hold user context
context = {}

ERROR_THRESHOLD = 0.25
def classify(sentence):
    # generate probabilities from the model
    load_model()
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

In [16]:
# The code was removed by DSX for sharing.

In [17]:
from socketIO_client_nexus import SocketIO, BaseNamespace, LoggingNamespace

def on_connect():
    print('on_connect')

def on_disconnect():
    print('on_disconnect')

def on_reconnect():
    print('on_reconnect')

def on_response(*message):
    msg = json.loads(json.dumps(message))
    print(type(msg))
    print('\n\non_response: >> ', msg[0])

def connectSocketIO():
#     SocketIO('https://localhost', verify=False)
    with SocketIO('https://my-watson-assistant-api.mybluemix.net', verify=False) as socketIO:
        # with SocketIO('localhost', verify=False) as socketIO:
        socketIO.on('connect', on_connect)
        socketIO.on('disconnect', on_disconnect)
        socketIO.on('reconnect', on_reconnect)
        socketIO.on('/ml', on_response)
        socketIO.wait()


In [18]:
reset_all()

Training Step: 47549  | total loss: [1m[32m0.01427[0m[0m | time: 2.007s
| Adam | epoch: 150 | loss: 0.01427 - acc: 0.9980 -- iter: 2528/2532
Training Step: 47550  | total loss: [1m[32m0.01331[0m[0m | time: 2.010s
| Adam | epoch: 150 | loss: 0.01331 - acc: 0.9982 -- iter: 2532/2532
--
INFO:tensorflow:/home/dsxuser/work/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
<<<<<<<< ML MODEL CREATED AND SAVED >>>>>>>>>>>


<<<<<<<< MODEL DATA SAVED IN PICKLE >>>>>>>>>>>


Starting file transfer for model/model.tflearn.data-00000-of-00001 to bucket: myml-donotdelete-pr-zhsoop3fasxh7h

Transfer for model/model.tflearn.data-00000-of-00001 Complete!

Starting file transfer for model/model.tflearn.meta to bucket: myml-donotdelete-pr-zhsoop3fasxh7h

Transfer for model/model.tflearn.meta Complete!

Starting file transfer for model/model.tflearn.index to bucket: myml-donotdelete-pr-zhsoop3fasxh7h

Transfer for model/model.tflearn.index Complete!

Starting file transfer f

In [19]:
reset_for_classification()

{'IBM_API_KEY_ID': 'jDLQvkwwo3h77B5MWgqOTUq25D94Xr6CGrb_6dYmVcj-', 'IBM_AUTH_ENDPOINT': 'https://iam.ng.bluemix.net/oidc/token', 'ENDPOINT': 'https://s3-api.us-geo.objectstorage.service.networklayer.com'}
{'FILE': 'raw_car_dashboard_ml.csv', 'BUCKET': 'myml-donotdelete-pr-zhsoop3fasxh7h'}
2532 documents
26 classes ['about_VA', 'capabilites', 'capabilities', 'compound_questions', 'decision_replies', 'goodbyes', 'greetings', 'improving_system', 'information_request', 'interface_interactions', 'interface_issues', 'locate_amenity', 'navigation', 'negative_reaction', 'not_specified', 'out_of_scope', 'phone', 'positive_reaction', 'selections', 'system_reliance', 'traffic_update', 'turn_down', 'turn_off', 'turn_on', 'turn_up', 'weather']


In [21]:
classify('how can I go home')

<<< ML Model Already Exists >>>>>


[('navigation', 0.92760938)]