## Generate training Data for the Bot

In [1]:
import numpy as np
import pandas as pd

In [2]:
filtered_terms = pd.read_csv('./data/filtered_terms.csv')

In [3]:
filtered_terms.head()

Unnamed: 0.1,Unnamed: 0,Glossary Type,Term Name,Description,Usage Context,Associated Category Names,Service Line
0,0,Acronyms,HUGO,Have U Gone Online (Intranet),HUGO,,
1,3,Acronyms,NI,Nominal Insurer- The Organization is the Nomin...,NI,,
2,4,Acronyms,CVV,Card Verification Value,CVV,,
3,7,Acronyms,EOI,Expression of Interest,EOI,,
4,8,Acronyms,TEED,Tape Effective End Date (Submission End Date),TEED,,


In [4]:
entity_list = [str(i) for i in filtered_terms['Term Name'].tolist()]

In [18]:
patterns_for_bot = {"intents":[
        {"tag": "greeting",
        "queries" : ["hello","hey there","howdy","hello","hi","hey","hey ho"],
        "responses": ["Hello! How may I help you?", "Hi! Good to see you. How can I assist you?", ]
           },
       {"tag":"meaning",
        "queries" : ["what is the meaning of @?","what does @ mean"," what is @?","what do you mean by @","meaning of  @",
                   "define @","I would want to know about @",
                   "what's @ by the way",
                   "know about @",
                   "let me know the meaning of @",
                   "@ meaning",
                   "find about @.",
                   "look for @.",
                   "look for the meaning of @", 
                   "search for the meaning of @ "]
         },
         {"tag":"bye",
          "queries":["bye.","Thank you","Thank you for the help.","It was good talking to you","Amazing","Great",
                    "superb","Thanks for the assistance"],
         "responses":["Thank you! Have a good time","Thank you! Wish to see you back again"]
           }
    ]
}

In [78]:
import re
from collections import OrderedDict
import json

def generate_rasa_train_data(train_data_for_bot, entity_list, file_to_save='./rasa_training_data.json'):
    
    base_dict_struct = OrderedDict()
    example_dict_struct = OrderedDict()
    example_dict_struct["common_examples"] = []
    example_dict_struct["regex_features"] = []
    example_dict_struct["lookup_tables"] = []
    example_dict_struct["entity_synonyms"] = []
    base_dict_struct["rasa_nlu_data"] = example_dict_struct
    
    common_example_struct = OrderedDict()
    common_example_struct["text"] = ""
    common_example_struct["intent"] = "blah"
    common_example_struct["entities"] = []
    
    entity_dict_struct = OrderedDict()
    entity_dict_struct["start"] = None
    entity_dict_struct["end"] = None
    entity_dict_struct["value"] = ""
    entity_dict_struct["entity"] = ""
    patterns_for_bot = train_data_for_bot['intents']
    
    for pattern in patterns_for_bot:        
        if ((pattern['tag'] == 'greeting') or (pattern['tag'] == 'bye')):
            for query in pattern['queries']:
                temp_example = common_example_struct.copy()
                temp_example["intent"] = pattern['tag']
                temp_example["text"] = query
                temp_example["entities"] = []
                base_dict_struct["rasa_nlu_data"]["common_examples"].append(temp_example)
        if pattern['tag'] == 'meaning':
            for query in pattern['queries']:
                for entity in entity_list:
                    temp_example = common_example_struct.copy()
                    temp_example["intent"] = pattern['tag']
                    temp_example["text"] = re.sub(r'@', entity, query)
                    temp_ent_dict = entity_dict_struct.copy()
                    entity_pattern = re.search(entity, temp_example["text"]).span()
                    temp_ent_dict["start"] = entity_pattern[0]
                    temp_ent_dict["end"] = entity_pattern[1]
                    temp_ent_dict["value"] = entity
                    temp_ent_dict["entity"] = "BUSINESS_TERM"
                    temp_example["entities"] = []
                    temp_example["entities"].append(temp_ent_dict)
                    base_dict_struct["rasa_nlu_data"]["common_examples"].append(temp_example)
        
    with open(file_to_save, 'w') as f:
        json.dump(base_dict_struct, f)
            
    return base_dict_struct

In [79]:
rasa_train_data = generate_rasa_train_data(patterns_for_bot, entity_list, file_to_save='./rasa_training_data.json')

In [92]:
rasa_train_data["rasa_nlu_data"]["common_examples"][15:18]

[OrderedDict([('text', 'what is the meaning of RPL?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 26),
                             ('value', 'RPL'),
                             ('entity', 'BUSINESS_TERM')])])]),
 OrderedDict([('text', 'what is the meaning of RACGP?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 28),
                             ('value', 'RACGP'),
                             ('entity', 'BUSINESS_TERM')])])]),
 OrderedDict([('text', 'what is the meaning of MSL?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 26),
                             ('value', 'MSL'),
                             ('entity', 'BUSINESS_TERM')])])])]

## Training the RASA Language Model

In [81]:
from rasa_nlu.training_data import load_data
from rasa_nlu.model import Trainer
from rasa_nlu import config
from rasa_nlu.model import Interpreter

In [86]:
training_data = load_data('./rasa_training_data.json')
trainer = Trainer(config.load("./config_model.yml"))

In [87]:
trainer.train(training_data)

Fitting 2 folds for each of 6 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    7.0s finished


<rasa_nlu.model.Interpreter at 0x7f8cb9305470>

In [88]:
model_directory = trainer.persist('./', fixed_model_name='./terms_model')

In [89]:
interpreter = Interpreter.load('./default/terms_model')

  "".format(entity_synonyms_file))


In [90]:
query = 'what is the meaning of RPL?'

In [183]:
interpreter.parse(query)

{'entities': [{'confidence': 0.9996398572339271,
   'end': 26,
   'entity': 'BUSINESS_TERM',
   'extractor': 'ner_crf',
   'start': 23,
   'value': 'rpl'}],
 'intent': {'confidence': 0.9999996600327454, 'name': 'meaning'},
 'intent_ranking': [{'confidence': 0.9999996600327454, 'name': 'meaning'},
  {'confidence': 2.1728602470635963e-07, 'name': 'bye'},
  {'confidence': 1.2268122992605558e-07, 'name': 'greeting'}],
 'text': 'what is the meaning of RPL?'}

## Creating the Policy for the Bot

In [190]:
from rasa_nlu.model import Interpreter
import numpy as np

def bot_policy(query, filtered_terms = './data/filtered_terms.csv', nlu_model = './default/terms_model'):
    interpreter = Interpreter.load(nlu_model)
    nlu_response = interpreter.parse(query)
    if nlu_response['intent']['name'] == 'greeting':
        responses = ["Hello! How may I help you?", "Hi! Good to see you. How can I assist you?"]
        return responses[np.random.randint(0, len(responses))]
    elif nlu_response['intent']['name'] == 'meaning':
        filtered_terms = pd.read_csv(filtered_terms)
        entity_value = nlu_response['entities'][0]['value']
        entity_record = filtered_terms[filtered_terms['Term Name'].str.lower().str.contains(entity_value)]
        return "The meaning of {} according to the Glossary Type {} is {}".format(entity_value,
                                                                                  entity_record['Glossary Type'].values[0],
                                                                                  entity_record['Description'].values[0])
    else:
        responses = ["Thank you! Have a good time","Thank you! Wish to see you back again"]
        return responses[np.random.randint(0, len(responses))]

In [191]:
query = 'what is the meaning of RPL?'

In [192]:
bot_policy(query)

  "".format(entity_synonyms_file))


'The meaning of rpl according to the Glossary Type Acronyms is Retro Paid Loss'