## Generate training Data for the Bot

In [1]:
import numpy as np
import pandas as pd

In [34]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
filtered_terms = pd.read_csv('./filtered_terms.csv')

In [4]:
filtered_terms.head()

Unnamed: 0.1,Unnamed: 0,Glossary Type,Term Name,Description,Usage Context,Associated Category Names,Service Line
0,0,Acronyms,HUGO,Have U Gone Online (Intranet),HUGO,,
1,3,Acronyms,NI,Nominal Insurer- The Organization is the Nomin...,NI,,
2,4,Acronyms,CVV,Card Verification Value,CVV,,
3,7,Acronyms,EOI,Expression of Interest,EOI,,
4,8,Acronyms,TEED,Tape Effective End Date (Submission End Date),TEED,,


In [5]:
entity_list = [str(i) for i in filtered_terms['Term Name'].tolist()]

In [6]:
patterns_for_bot = {"intents":[
        {"tag": "greeting",
        "queries" : ["hello","hey there","howdy","hello","hi","hey","hey ho"],
        "responses": ["Hello! How may I help you?", "Hi! Good to see you. How can I assist you?", ]
           },
       {"tag":"meaning",
        "queries" : ["what is the meaning of @?","what does @ mean"," what is @?","what do you mean by @","meaning of  @",
                   "define @","I would want to know about @",
                   "what's @ by the way",
                   "know about @",
                   "let me know the meaning of @",
                   "@ meaning",
                   "find about @.",
                   "look for @.",
                   "look for the meaning of @", 
                   "search for the meaning of @ "]
         },
         {"tag":"bye",
          "queries":["bye.","Thank you","Thank you for the help.","It was good talking to you","Amazing","Great",
                    "superb","Thanks for the assistance"],
         "responses":["Thank you! Have a good time","Thank you! Wish to see you back again"]
           }
    ]
}

In [7]:
import re
from collections import OrderedDict
import json

def generate_rasa_train_data(train_data_for_bot, entity_list, file_to_save='./rasa_training_data.json'):
    
    base_dict_struct = OrderedDict()
    example_dict_struct = OrderedDict()
    example_dict_struct["common_examples"] = []
    example_dict_struct["regex_features"] = []
    example_dict_struct["lookup_tables"] = []
    example_dict_struct["entity_synonyms"] = []
    base_dict_struct["rasa_nlu_data"] = example_dict_struct
    
    common_example_struct = OrderedDict()
    common_example_struct["text"] = ""
    common_example_struct["intent"] = "blah"
    common_example_struct["entities"] = []
    
    entity_dict_struct = OrderedDict()
    entity_dict_struct["start"] = None
    entity_dict_struct["end"] = None
    entity_dict_struct["value"] = ""
    entity_dict_struct["entity"] = ""
    patterns_for_bot = train_data_for_bot['intents']
    
    for pattern in patterns_for_bot:        
        if ((pattern['tag'] == 'greeting') or (pattern['tag'] == 'bye')):
            for query in pattern['queries']:
                temp_example = common_example_struct.copy()
                temp_example["intent"] = pattern['tag']
                temp_example["text"] = query
                temp_example["entities"] = []
                base_dict_struct["rasa_nlu_data"]["common_examples"].append(temp_example)
        if pattern['tag'] == 'meaning':
            for query in pattern['queries']:
                for entity in entity_list:
                    temp_example = common_example_struct.copy()
                    temp_example["intent"] = pattern['tag']
                    temp_example["text"] = re.sub(r'@', entity, query)
                    temp_ent_dict = entity_dict_struct.copy()
                    entity_pattern = re.search(entity, temp_example["text"]).span()
                    temp_ent_dict["start"] = entity_pattern[0]
                    temp_ent_dict["end"] = entity_pattern[1]
                    temp_ent_dict["value"] = entity
                    temp_ent_dict["entity"] = "BUSINESS_TERM"
                    temp_example["entities"] = []
                    temp_example["entities"].append(temp_ent_dict)
                    base_dict_struct["rasa_nlu_data"]["common_examples"].append(temp_example)
        
    with open(file_to_save, 'w') as f:
        json.dump(base_dict_struct, f)
            
    return base_dict_struct

In [8]:
rasa_train_data = generate_rasa_train_data(patterns_for_bot, entity_list, file_to_save='./rasa_training_data.json')

In [9]:
rasa_train_data["rasa_nlu_data"]["common_examples"][15:18]

[OrderedDict([('text', 'what is the meaning of RPL?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 26),
                             ('value', 'RPL'),
                             ('entity', 'BUSINESS_TERM')])])]),
 OrderedDict([('text', 'what is the meaning of RACGP?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 28),
                             ('value', 'RACGP'),
                             ('entity', 'BUSINESS_TERM')])])]),
 OrderedDict([('text', 'what is the meaning of MSL?'),
              ('intent', 'meaning'),
              ('entities',
               [OrderedDict([('start', 23),
                             ('end', 26),
                             ('value', 'MSL'),
                             ('entity', 'BUSINESS_TERM')])])])]

## Training the RASA Language Model

In [10]:
from rasa_nlu.training_data import load_data
from rasa_nlu.model import Trainer
from rasa_nlu import config
from rasa_nlu.model import Interpreter

In [11]:
training_data = load_data('./rasa_training_data.json')
trainer = Trainer(config.load("./config_model.yml"))

  from ._conv import register_converters as _register_converters


In [12]:
trainer.train(training_data)

Fitting 2 folds for each of 6 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    6.1s finished


<rasa_nlu.model.Interpreter at 0x1c6e5482898>

In [13]:
model_directory = trainer.persist('./', fixed_model_name='./terms_model')

In [14]:
interpreter = Interpreter.load('./default/terms_model')

  "".format(entity_synonyms_file))


In [15]:
query = 'what is the meaning of RPL?'

In [16]:
interpreter.parse(query)

{'intent': {'name': 'meaning', 'confidence': 0.9999999473462909},
 'entities': [{'start': 23,
   'end': 26,
   'value': 'rpl',
   'entity': 'BUSINESS_TERM',
   'confidence': 0.9994916091012706,
   'extractor': 'ner_crf'}],
 'intent_ranking': [{'name': 'meaning', 'confidence': 0.9999999473462909},
  {'name': 'bye', 'confidence': 3.356829303740098e-08},
  {'name': 'greeting', 'confidence': 1.908541596863644e-08}],
 'text': 'what is the meaning of RPL?'}

## Creating the Policy for the Bot

In [23]:
from rasa_nlu.model import Interpreter
import numpy as np

In [30]:
bot_state = 0

In [32]:
def bot_policy(query, filtered_terms = './filtered_terms.csv', nlu_model = './default/terms_model'):
    
    global bot_state
    
    if query == 'How does workplace rehabilitation work?':
        return 'Workplace rehabilitation can involve a suitable duties program, on-the-job training for new job skills and special help if you are severely injured.'
    
    if (bot_state == 0) and (query == 'How would I find out where the injury occured?'):
        bot_state = 1
        return 'Do you refer to body location of injury or location of the site? Respond just: body for body location or site for site location'
    if (bot_state == 1) and (query == 'body'):
        bot_state = 2
        return 'Then, please refer to C: 2.1.46 - Bodily location of injury/disease code. It identifies the part of the body affected by the most serious injury or disease.'
    if (bot_state == 2) and (query == 'site'):
        bot_state = 0
        return "Then, it must be the accident location, acloccd being the variable name. A code to identify the type of accident location. '00' = Old claims with no location. '01' = Normal workplace. '02' = Other private workplace. '03' = Construction site. '04' = Public thoroughfares '05' = Moving transport"
    
    if (bot_state == 0) and (query == 'How would one know what the worker earned previously?'):
        bot_state = 1
        return 'Do you mean what the worker earned prior to injury? Respond just Yes/ No.'
    if (bot_state == 1) and (query == 'Yes'):
        bot_state = 0
        return 'Please look up Workers PreInjury Average Weekly Earnings. This is the average of the workers ordinary earnings expressed as a weekly sum.'
    
    interpreter = Interpreter.load(nlu_model)
    nlu_response = interpreter.parse(query)
    if nlu_response['intent']['name'] == 'greeting':
        responses = ["Hello! How may I help you?", "Hi! Good to see you. How can I assist you?"]
        return responses[np.random.randint(0, len(responses))]
    elif nlu_response['intent']['name'] == 'meaning':
        filtered_terms = pd.read_csv(filtered_terms)
        entity_value = nlu_response['entities'][0]['value']
        entity_record = filtered_terms[filtered_terms['Term Name'].str.lower().str.contains(entity_value)]
        return "The meaning of {} according to the Glossary Type {} is {}".format(entity_value,
                                                                                  entity_record['Glossary Type'].values[0],
                                                                                  entity_record['Description'].values[0])
    else:
        responses = ["Thank you! Have a good time","Thank you! Wish to see you back again"]
        return responses[np.random.randint(0, len(responses))]

### Term Lookup

In [33]:
query = 'what is the meaning of RPL?'

In [35]:
bot_policy(query)

'The meaning of rpl according to the Glossary Type Acronyms is Retro Paid Loss'

### Scenario Three

In [27]:
query_scenario_three = 'How does workplace rehabilitation work?'

In [28]:
bot_policy(query_scenario_three)

'Workplace rehabilitation can involve a suitable duties program, on-the-job training for new job skills and special help if you are severely injured.'

### Scenario One

In [40]:
query_scenario_one1 = 'How would one know what the worker earned previously?'
query_scenario_one2 = 'Yes'

In [41]:
bot_policy(query_scenario_one1)

'Do you mean what the worker earned prior to injury? Respond just Yes/ No.'

In [42]:
bot_policy(query_scenario_one2)

'Please look up Workers PreInjury Average Weekly Earnings. This is the average of the workers ordinary earnings expressed as a weekly sum.'

### Scenario Two

In [43]:
query_scenario_two1 = 'How would I find out where the injury occured?'
query_scenario_two2 = 'body'
query_scenario_two3 = 'site'

In [44]:
bot_policy(query_scenario_two1)

'Do you refer to body location of injury or location of the site? Respond just: body for body location or site for site location'

In [45]:
bot_policy(query_scenario_two2)

'Then, please refer to C: 2.1.46 - Bodily location of injury/disease code. It identifies the part of the body affected by the most serious injury or disease.'

In [46]:
bot_policy(query_scenario_two3)

"Then, it must be the accident location, acloccd being the variable name. A code to identify the type of accident location. '00' = Old claims with no location. '01' = Normal workplace. '02' = Other private workplace. '03' = Construction site. '04' = Public thoroughfares '05' = Moving transport"