In [3]:
import numpy as np
import pandas as pd
import random
import os
import json

<h2>Functions</h2>

<h3>General Functions</h3>

In [4]:
# Check if a Value is Nan
# param x - Value to check
# return boolean - If it is a Nan
def is_nan(x):
    return (x is np.nan or x != x)

# Check All Values in List are Nan
# param x - List to check
# return boolean - If it is all Nan
def list_is_nan(x):
    for i in x:
        if not is_nan(i): return False
    return True

# Converts names to single string
# param names (list) - List of names in the format [['Mia', 'Brown'],[..]]
# return unique (list) - List of names in the format ['Mia Brown', ..]
def name_to_str(names):
    return [",".join(name).replace(",", " ") for name in names]

# Converts names to single string
# param names (list) - List of names in the format ['Mia Brown', ..]
# return names_list (list) - List of names in the format [['Mia', 'Brown'],[..]]
def str_name_to_list(names):
    for i in range(len(names)):
        full_name = names[i].split()
        names[i] = [full_name[0], full_name[len(full_name) - 1]]
    return names

# Extracts All of the Elements in a List of (One or Two D) Lists
# param two_d_list (list) List of (One or Two D) Lists
# return new_elems (list) List of elements
def extract_2d_list(two_d_list):
    elems, new_elems = [], []
    for elem in two_d_list: elems.extend(elem)
    for elem in elems:
        if isinstance(elem, list): new_elems.extend(elem)
        else: new_elems.append(elem)
    return new_elems

<h3>Get Unique Values</h3>

In [5]:
# Get the Names of Employees in the HR Dataset
# param df (dataframe) - Dataframe with names
# param column (str) - Column can be 'Employee Name' or 'Manager Name'
# Returns names (list) - Array of names (['First', 'Last'])
def get_names(df, column='Employee Name'):
    names = [df[[column]].iloc[i][0] for i in range(df.shape[0] - 1)]
    for i in range(len(names)):
        try:
            full_name = names[i].split(',')
            first = full_name[1].strip().split()[0]
            last = full_name[0]
            names[i] = [first, last]
        except:
            names[i] = ['Jeremy', 'Prater']
    return names

# Filter to list of unique names
# param names (list) - List of names in the format [['Mia', 'Brown'],[..]]
# param avoid (list) - List of names to avoid duplicates of
# return unique (list) - List of names in the format [['Mia', 'Brown'],[..]]
def filter_names_helper(names, avoid = []):
    unique = []
    first, last = [], []
    if(len(avoid) != 0):
        first = [n[0] for n in avoid]
        last = [n[1] for n in avoid]
    for name in names:
        if((name[0] not in first) and (name[1] not in last)):
            first.append(name[0])
            last.append(name[1])
            unique.append(name)
    return unique

# Filter to list of unique names that don't overlap with manager names
# param df (dataframe) - Dataframe with names 
# return unique (list) - List of names in the format [['Mia', 'Brown'],[..]]
def get_filtered_names(df):
    managers = str_name_to_list(get_uniq_str(df, "Manager Name"))
    managers = filter_names_helper(managers)
    survey_users = [['Mia','Brown'], ['Ivan','Rogers'], ['Julia','Soto'], ['Nan','Singh']]
    user_names = get_names(hr_data)
    user_names = filter_names_helper(user_names, avoid=survey_users)
    user_names = filter_names_helper(user_names, avoid=managers)
    user_names.extend(survey_users)
    return user_names

# Update Name Synonyms
def name_syn_update(ent_dict):
    syn_names = []
    for name in ent_dict['name'][0]:
        name_splt = name.split()
        name_syn = [name.lower(), name_splt[0], name_splt[0].lower(),
                    name_splt[1], name_splt[1].lower()]
        syn_names.append(name_syn)
    return syn_names

In [6]:
# Get the Unique Names of Employees in the HR Dataset
# param df (dataframe) - Dataframe with names
# return unique (list) - List of names in the format ['Mia Brown', ..]
def get_uniq_names(df):
    return filter_names(get_names(df))

# Get Unique String Values of a Dataframe Column
# param df (dataframe) - Source Dataframe
# param col_name (str) - Name of Column to get unique values'
# return uniq_arr (arr) - Array of Unique Values
def get_uniq_str(df, col_name):
    return [i.lower().strip() for i in df[col_name].unique()[:-1]]

# Get Unique String Values of a Dataframe Column
# param df (dataframe) - Source Dataframe
# param col_name (str) - Name of Column to get unique values'
# return uniq_arr (arr) - Array of Unique Values
def get_uniq_num(df, col_name):
    return [str(int(i)) for i in df[col_name].unique()[:-1]]

# Generate a Dictionary with Unique Values for Select Columns
# param df (dataframe) - Source Dataframe
# return uniq (dict) - Dictionary that maps entities to unique values
def gen_uniq_dict(df):
    # Get Unique Values of Relevant Columns
    uniq = {}
    # Predefined
    uniq['name'] = name_to_str(get_filtered_names(df))
    uniq['sex'] = ['male', 'female']
    uniq['employment_status'] = ['active', 'voluntarily terminated', 'terminated for a cause',
                          'on a leave of absence' + 'going to start work in the future']
    uniq['performance_score'] = ['fully meet performance expecations', 'are too early to review', 
                         'meet 90-day expectations', 'are exceptional', 'need improvement', 'exeed expecations']
    # Custom Preprocessing
    uniq['state'] = hr_data['State'].unique()[:-1]
    # Standard Preprocessing 
    uniq['age'] = get_uniq_num(hr_data, 'Age')
    uniq['maritaldesc'] = get_uniq_str(hr_data, "MaritalDesc")
    uniq['citizendesc'] = get_uniq_str(hr_data, "CitizenDesc")
    uniq['racedesc'] = get_uniq_str(hr_data, "RaceDesc")
    uniq['department'] = get_uniq_str(hr_data, "Department")
    uniq['position'] = get_uniq_str(hr_data, "Position")
    #uniq['manager'] = name_to_str(str_name_to_list(get_uniq_str(hr_data, "Manager Name")))
    uniq['employee_source'] = get_uniq_str(hr_data, "Employee Source")
    return uniq

# Generate a Dictionary with Unique Values for Select Columns
# param uniq (dict) - Dictionary that maps entities to unique values
# param ent_dict (dict) - Dictionary that Contains Entity Information
# return uniq (dict) - Dictionary that maps entities to unique values
def uniq_dict_update(uniq, ent_dict):
    uniq['money'] = ent_dict['money'][1][0]
    uniq['time_interval'] = extract_2d_list(ent_dict['time_interval'])
    uniq['time_recur'] = extract_2d_list(ent_dict['time_recur'])
    uniq['function'] = extract_2d_list(ent_dict['function'])
    uniq['extreme'] = extract_2d_list(ent_dict['extreme'])
    uniq['employment_action'] = extract_2d_list(ent_dict['employment_action'])
    uniq['date_compare'] = extract_2d_list(ent_dict['date_compare'])
    uniq['manager'] = extract_2d_list(ent_dict['manager'])
    return uniq

In [7]:
#extract_2d_list(ent_dict['manager'])

<h3>Load, Parse and Clean Entity Data</h3>

In [8]:
# Load entity data frame from CSV
# param path (str) - Path to csv
# return ent_df (dataframe) - Entity Info Dataframe
def load_ent(path):
    df = pd.read_csv(path).iloc[:, :4]
    ct = 0
    while (ct < len(df) and not list_is_nan(df.iloc[ct, :].values)): ct += 1
    return df.iloc[:ct, :]

# Parse CSV that contains [Entity, Options, Synonyms, Gazetteer]
# param ent_df (dataframe) - Contains the columns listed above
# return ent_dict (dict) - Uncleaned dictionary where the keys are the entities
#                          and the values are [options (arr), synonyms (arr), gazetteer (arr)]
def ent_parse(ent_df):
    ent_dict = {}
    ct = 0
    curr_ent = ''
    while(ct < len(ent_df)):
        row = ent_df.iloc[ct]
        # update curr_ent if new entity
        if(not is_nan(row['Entity'])): 
            curr_ent = row['Entity']
            ent_dict[curr_ent] = [[], [], []]
        # check if options is a list
        if(not is_nan(row['Options']) and row['Options'].count(',') > 2):
            ent_dict[curr_ent][0] = row['Options']
            if(not is_nan(row['Synonyms'])): 
                ent_dict[curr_ent][1] = row['Synonyms'].split(',')
            if(not is_nan(row[3])): ent_dict[curr_ent][2] = row['Gazetteer']
        # single entry in option column
        else:
            ent_dict[curr_ent][0].append(row['Options'])
            ent_dict[curr_ent][1].append(row['Synonyms'])
            ent_dict[curr_ent][2].append(row['Gazetteer'])
        ct += 1
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_options(ent_dict):
    for i in ent_dict.keys():
        old_opt = ent_dict[i][0]
        new_opt = []
        for word in old_opt:
            if(not is_nan(word)):
                new_opt.append(word.replace("'", "").replace(",", ""))
        ent_dict[i][0] = new_opt
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_synonyms(ent_dict):
    for i in ent_dict.keys():
        old_syn = ent_dict[i][1]
        new_syn = []
        if(len(old_syn) != 0):
            for j in old_syn:
                if(not is_nan(j)): new_syn.append([string.strip() for string in j.split(',')])
                else: new_syn.append(j)
        ent_dict[i][1] = new_syn
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_gazetteer(ent_dict):
    for i in ent_dict.keys():
        old_gaz = ent_dict[i][2]
        clean_gaz = []
        if (len(old_gaz) != 0):        
            if (type(old_gaz) is str): clean_gaz = old_gaz.split(',')
            elif(not is_nan(old_gaz[0])): clean_gaz = (old_gaz[0].split(','))
        ent_dict[i][2] = [i.strip() for i in clean_gaz]
    return ent_dict

# Cleans an Entity Dictionary
# ent_dict (dict) - Uncleaned dictionary of parsed entity information
# uniq (dict) - Dictionary that maps entities to unique values
# return cleaned_ent_dict (dict) - Cleaned dictionary
def clean_ent_dict(ent_dict, uniq):
    ent_dict = clean_options(ent_dict)
    ent_dict = clean_synonyms(ent_dict)
    ent_dict = clean_gazetteer(ent_dict)
    ent_dict['name'][0] = uniq['name']
    ent_dict['state'][0] = uniq['state']
    ent_dict['age'][0] = uniq['age']
    return ent_dict

# Parse and Clean Entity Data
# Parse CSV that contains [Entity, Options, Synonyms, Gazetteer]
# param ent_df (dataframe) - Contains the columns listed above
def parse_clean_ent(ent_df, uniq):
    ent = ent_parse(ent_df)
    return clean_ent_dict(ent, uniq)

<h3>File Generation</h3>

In [9]:
# Convert a list into a text file
# param folder (str) - Name of the folder to write the file
# param lines (list) - List of values to write to file
def list_to_txt_file(folder, lines):
    directory = '../hr_assistant/entities/' + folder + '/'
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + 'gazetteer.txt', 'w+') as filehandle:  
        filehandle.writelines("%s\n" % line for line in lines)
        
# Convert a dict into a json file
# param folder (str) - Name of the folder to write the file
# param dict (dict) - Json dict to write to file
def dict_to_json_file(folder, json_dict):
    directory = '../hr_assistant/entities/' + folder + '/'
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + "mapping.json", "w+") as f:
        json_str = json.dumps(json_dict, indent=4)
        f.write(json_str)
        
# Generate a Mapping JSON Dict to create Mapping.json file
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to generate the gazetteer list for
# return json_dict (dict) - Json Dict in the proper format for mapping.json
def gen_map_json(ent_dict, entity):
    opt = ent_dict[entity][0]
    syn = ent_dict[entity][1]
    # Check if the Options and the Synonyms Align
    if(len(opt) != len(syn)): syn = [[] for i in range(len(opt))]
    return gen_map_json_helper(opt, syn)

# Helper function to Generate a Mapping JSON Dictionary
# param options (list) - Array of options
# param synonyms (2d list) - Array of synonym arrays corresponding to options
# return json_dict (dict) - Json Dict in the proper format for mapping.json
def gen_map_json_helper(options, synonyms):
    json_dict = {}
    json_dict['entities'] = []
    for i in range(len(options)):
        new_dict = {}
        if(is_nan(synonyms[i])): new_dict['whitelist'] = []
        else: new_dict['whitelist'] = synonyms[i]
        new_dict['cname'] = options[i]
        json_dict['entities'].append(new_dict)
    return json_dict

# Create Mapping.json files for Every Entity in an Entity Dictionary
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
def gen_map_json_files(ent_dict):
    for entity in ent_dict.keys():
        json_dict = gen_map_json(ent_dict, entity)
        if(not is_nan(json_dict)): dict_to_json_file(entity, json_dict)

# Collect All Synonyms for a Single Entity in Entity Dict
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to get Synyonyms for
# return synonyms (list) - List of Synonyms of Specified entity
def get_synonyms(ent_dict, entity):
    synonyms = []
    for syn in ent_dict[entity][1]:
        if(not is_nan(syn)): synonyms.extend(syn)
    return synonyms

# Generate a Gazetter List for an Entity
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to generate the gazetteer list for
# return gaz (list) - List of gazetteer word relevant to the entity
def gen_gaz_list(ent_dict, entity):
    gaz = []
    gaz.extend(ent_dict[entity][0])
    gaz.extend(get_synonyms(ent_dict, entity))
    gaz.extend(ent_dict[entity][2])
    return gaz

# Create Gazetteer Files for Every Entity in an Entity Dictionary
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
def gen_gazetteers(ent_dict):
    for entity in ent_dict.keys():
        gaz_list = gen_gaz_list(ent_dict, entity)
        list_to_txt_file(entity, gaz_list)

<h3>Debug Labelling</h3>

In [10]:
# Find all labels in a sentence
# pram text (str) String that may or may not contain labels in the form of {ent_type|ent}
# return labels (list) List of labels found in the text
def get_labels(text):
    chars = list(text)
    start, stop = 0, 0
    labels = []
    positions = []
    for i in range(len(chars)):
        if chars[i] == '{': 
            start = i
        if chars[i] == '}':
            stop = i
            txt = chars[start:(stop + 1)]
            labels.append("".join(chars[start:(stop + 1)]))
            positions.append([start, stop])
    return labels, positions

# Given a list of labels, Seperate Key and Values
# param labels (list) List of labels found in the text
# return kv (2d list) List of the Keys list and Values list [[key_array], [value_array]]
def get_kv(labels):
    k = []
    v = []
    for label in labels:
        if '|' in label:
            k.append(label.split('|')[0].replace("{", ""))
            v.append(label.split('|')[1].replace("}", ""))
    return [k,v]

# Check if a Synonym Exists in a Sentence that it is labelled
# param sentence (str) String that may or may not contain labels in the form of {ent_type|ent}
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param kv_labels (2d list) List of the Keys list and Values list [[key_array], [value_array]]
# param col (str) Column in the dataframe
# param idx (int) Index of the Current Sentence in the Dataframe
def label_chk_helper(sentence, ent_dict, kv_labels, col, idx):
    sent_split = [word.lower() for word in sentence.split()]
    for ent in ent_dict:
        # Check for 'Entity Options' in text
        options = ent_dict[ent][0]
        for opt in options:
            if opt in sent_split and opt not in kv_labels[0] and opt not in kv_labels[1]:
                print("POTENTIAL OPTION-MISSING LABEL")
                print("OPTION: " + opt)
                print("ENTITY: " + ent)
                print("SENTENCE: " + sentence)
                print("Column: " + col)
                print("Index: " + str(idx + 2))
                print("==========================================")
        # Check for 'Synonym Options' in text
        syns = get_synonyms(ent_dict, ent)
        for syn in syns:
            if syn.lower() in sent_split and syn not in kv_labels[0]:
                print("POTENTIAL SYNONYM-MISSING LABEL")
                print("SYNONYM: " + syn)
                print("ENTITY: " + ent)
                print("SENTENCE: " + sentence)
                print("Column: " + col)
                print("Index: " + str(idx + 2))
                print("==========================================")
                
# Finds mismatches between labels in the Dataframe
# param kv_labels (2d list) List of the Keys list and Values list [[key_array], [value_array]]
# param sentence (str) String that may or may not contain labels in the form of {ent_type|ent}
# param l_dict (dict) Dictionary that keeps track of entity label mappings passed in from the main function
# param col (str) Column in the dataframe
# param idx (int) Index of the Current Sentence in the Dataframe
def chk_mismatch(labels, sentence, l_dict, col, idx):
    if(len(labels[0]) != 0):
        for ct in range(len(labels[0])):
            k = labels[0][ct]
            v = labels[1][ct]
            
            if k in l_dict: 
                if l_dict[k] != v:
                    print("POTENTIAL MISMATCH for: " + k)
                    print("CURRENT LABEL: " + v)
                    print("IN DICT: " + l_dict[k])
                    print("SENTENCE: " + sentence)
                    print("Column: " + col)
                    print("Index: " + str(idx + 2))
                    print("==========================================")
            else: l_dict[k] = v  

# Check if the Synonyms to Entities Were not Labelled in Training
# param df (dataframe) - Dataframe where each column is an intent and each row has sentence examples
# param ent_dict (dict) - Dictionary that contains entities as keys and synonyms
def label_chk(df, ent_dict):
    for col in df:
        print(col.upper() + "=============================================================")
        idx = 2
        nan = is_nan(df[col][idx])
        l_dict = {}
        while(idx < len(df)):
            if(is_nan(df[col][idx])): break
            sentence = df[col][idx]
            labels, pos = get_labels(sentence)
            labels = get_kv(labels)
            label_chk_helper(sentence, ent_dict, labels, col, idx)
            chk_mismatch(labels, sentence, l_dict, col, idx)
            idx += 1

<h2>Workflow</h2>

<h4>Load Data</h4>

In [27]:
hr_data = pd.read_csv('./custom_scripts/old_core_dataset.csv')
hr_data

Unnamed: 0,Employee Name,Employee Number,State,Zip,DOB,Age,Sex,MaritalDesc,CitizenDesc,Hispanic/Latino,...,Date of Hire,Date of Termination,Reason For Term,Employment Status,Department,Position,Pay Rate,Manager Name,Employee Source,Performance Score
0,"Brown, Mia",1.103024e+09,MA,1450.0,11/24/1985,32.0,Female,Married,US Citizen,No,...,10/27/2008,,N/A - still employed,Active,Admin Offices,Accountant I,28.50,Brandon R. LeBlanc,Diversity Job Fair,Fully Meets
1,"LaRotonda, William",1.106027e+09,MA,1460.0,4/26/1984,33.0,Male,Divorced,US Citizen,No,...,1/6/2014,,N/A - still employed,Active,Admin Offices,Accountant I,23.00,Brandon R. LeBlanc,Website Banner Ads,Fully Meets
2,"Steans, Tyrone",1.302053e+09,MA,2703.0,9/1/1986,31.0,Male,Single,US Citizen,No,...,9/29/2014,,N/A - still employed,Active,Admin Offices,Accountant I,29.00,Brandon R. LeBlanc,Internet Search,Fully Meets
3,"Howard, Estelle",1.211051e+09,MA,2170.0,9/16/1985,32.0,Female,Married,US Citizen,No,...,2/16/2015,4/15/2015,N/A - still employed,Active,Admin Offices,Administrative Assistant,21.50,Brandon R. LeBlanc,Pay Per Click - Google,N/A- too early to review
4,"Singh, Nan",1.307060e+09,MA,2330.0,5/19/1988,29.0,Female,Single,US Citizen,No,...,5/1/2015,,N/A - still employed,Active,Admin Offices,Administrative Assistant,16.56,Brandon R. LeBlanc,Website Banner Ads,N/A- too early to review
5,"Smith, Leigh Ann",7.110077e+08,MA,1844.0,6/14/1987,30.0,Female,Married,US Citizen,No,...,9/26/2011,9/25/2013,career change,Voluntarily Terminated,Admin Offices,Administrative Assistant,20.50,Brandon R. LeBlanc,Diversity Job Fair,Fully Meets
6,"LeBlanc, Brandon R",1.102024e+09,MA,1460.0,6/10/1984,33.0,Male,Married,US Citizen,No,...,1/5/2016,,N/A - still employed,Active,Admin Offices,Shared Services Manager,55.00,Janet King,Monster.com,Fully Meets
7,"Quinn, Sean",1.206043e+09,MA,2045.0,11/6/1984,33.0,Male,Married,Eligible NonCitizen,No,...,2/21/2011,8/15/2015,career change,Voluntarily Terminated,Admin Offices,Shared Services Manager,55.00,Janet King,Diversity Job Fair,Fully Meets
8,"Boutwell, Bonalyn",1.307060e+09,MA,2468.0,4/4/1987,30.0,Female,Married,US Citizen,No,...,2/16/2015,,N/A - still employed,Active,Admin Offices,Sr. Accountant,34.95,Brandon R. LeBlanc,Diversity Job Fair,90-day meets
9,"Foster-Baker, Amy",1.201031e+09,MA,2050.0,4/16/1979,38.0,Female,Married,US Citizen,no,...,1/5/2009,,N/A - still employed,Active,Admin Offices,Sr. Accountant,34.95,Board of Directors,Other,Fully Meets


In [10]:
#hr_data.columns

<h4>Create Unique Values Dictionary</h4>

In [28]:
uniq = gen_uniq_dict(hr_data)

In [12]:
#uniq

<h4>Load Entities</h4>

In [29]:
ent_df = load_ent('./custom_scripts/HR Manager Schema - Entities.csv')
ent_dict = parse_clean_ent(ent_df, uniq)
ent_dict['name'][1] = name_syn_update(ent_dict)
uniq = uniq_dict_update(uniq, ent_dict)
ent_dict

{'name': [['William LaRotonda',
   'Tyrone Steans',
   'Estelle Howard',
   'Leigh Smith',
   'Brandon LeBlanc',
   'Sean Quinn',
   'Bonalyn Boutwell',
   'Amy Foster-Baker',
   'Janet King',
   'Jennifer Zamora',
   'Renee Becker',
   'Taisha Goble',
   'Daniff Hernandez',
   'Jayne Horton',
   'Noelle Johnson',
   'Thomas Murray',
   'Randall Pearson',
   'Thelma Petrowsky',
   'Lori Roby',
   'Jason Salter',
   'Kramer Simard',
   'Simon Roup',
   'Ricardo Ruiz',
   'Peter Monroe',
   'Eric Dougall',
   'Rick Clayton',
   'Lisa Galia',
   'Leonara Lindsay',
   'Alejandro Bacong',
   'Anthony Cisco',
   'Linda Dolan',
   'Maria Gonzalez',
   'Carlos Merlos',
   'Tanya Morway',
   'Anita Shepard',
   'Neville Tredinnick',
   'Jumil Turpin',
   'Karthikeyan Ait Sidi',
   'Claudia Carr',
   'Donald Favis',
   'Bianca Roehrich',
   'Ann Daniele',
   'Jyoti Lajiri',
   'Jeremiah Semizoglou',
   'Joe South',
   'Sarah Warfield',
   'Elisa Bramante',
   'Michael Albert',
   'Charles Bozzi'

In [30]:
ent_dict['name'][1][240]

['andrew szabo', 'Andrew', 'andrew', 'Szabo', 'szabo']

<h4>Labelling Check</h4>

In [31]:
intent_txt = pd.read_csv('./custom_scripts/HR Manager Schema - intent_master.csv')
intent_txt = intent_txt.iloc[2:, :]
intent_txt

Unnamed: 0,get_info,get_aggregate,get_employees,get_salary,get_salary_aggregate,get_salary_employees,get_date,get_date_range_aggregate,get date_range_employees,get_hierarchy
2,What is {nan|name}'s race?,What is the {total|function} {number of|functi...,Give me employees who are {single|maritaldesc},What is {Mia|name}'s {pay|money}?,What is the {median|function} {pay|money} of {...,Which employee(s) have {lowest|extreme} {incom...,What is the {date of hiring|employment_action}...,What {percentage|function} of employees were {...,Give me a list of people {hired|employment_act...,Who is {Mia|name}'s {manager|manager}?
3,Is {Michael|name} {married|maritaldesc}?,What {percent|function} of employees {exceeded...,All employees from {MA|state},Tell me who earned the {least|extreme} that wa...,what {percent|function} of employees {make|mon...,who is the {highest|extreme} {earning|money} {...,When did {Amy|name} {join|employment_action} t...,What {percent|function} of employees were {hir...,Tell me about employees who {started|employmen...,Which employees have {Julia|name} as their {ma...
4,What is {Nan|name}'s official position?,What is the {percentage|function} of new grads...,Which employees have been recently {terminated...,What is the {pay rate|money} of {Julia|name}?,What is the {average|function} {pay rate|money}?,For employees {hired|employment_action} {betwe...,How long has {Ivan|name} been with the company?,What {percentage|function} of employees were {...,Which employees were not yet {born|dob} when {...,What is the name of {Julia|name}'s {manager|ma...
5,did {Nan|name} hear about us through {Glassdoo...,What is the {average|function} {age|age}?,Which employees are not {US citizens|citizende...,What is {Nan|name}'s {pay rate|money}?,What is the {average|function} {pay|money} of ...,what are the {salaries|money} for employees th...,When was {Nan|name} {fired|employment_action}?,What {percent|function} of all our employees w...,list the employees who {joined|employment_acti...,Who is {Michael|name}'s {manager|manager}?
6,give me {Nan|name}'s race please,What {percent|function} of employees are manag...,Which employees were {let go|employment_action...,How much does {Michael|name} {make|money}?,Calculate the {average|function} {pay rate|mon...,what are our {top|extreme} {earners|money} {ma...,How long was {Mia|name} working for?,What {percentage|function} of employees were {...,Which employess were {hired|employment_action}...,Who is {John Reeder|name}'s {manager|manager}?
7,What is {Mia|name}'s employment status?,What's the {average|function} {age of|age} emp...,{managers|position},Is {Mia|name} being {paid|money} {$40k|sys_amo...,{average|function} {pay rate|money} for {women...,{below|comparator} {average|function} {earning...,When was {Michael|name}'s {date of hire|employ...,{How many|function} employees were {hired|empl...,Which employees were {hired|employment_action}...,Who is {Mia Brown|name}'s {manager|manager}?
8,Is {Ivan|name} from out of state?,What {percentage|function} of the employees ar...,Which employees have been {terminated|employme...,Does {Mia|name} get {$|money}{70k|sys_number} ...,What's the {average|function} {pay rate|money}?,Tell me who all are {making|money} {more than|...,What is {Nan|name}'s {date of birth|dob}?,{How many|function} people were based out of {...,Who worked for Cisco for {less than|comparator...,Who is the {manager|manager} for {Bob|name}
9,Does {Michael|name} {still work at|employment_...,What {percentage|function} of employees are {e...,Which employees have been with the company lon...,When we let {Nan|name} go {fired|employment_ac...,What is the {average|function} {pay rate|money...,Which {software engineers|position} are {paid|...,What year was {Mia|name} {hired|employment_act...,What {percentage|function} of employees have b...,Which employees have been {hired|employment_ac...,What is {Mia|name}'s {manager|manager}'s name?
10,What is {Mia|name}'s {performance score|perfor...,What {percentage|function} of employees are {f...,Which employees have gotten only {positive fee...,{pay rate|money} of {Mia|name},{How many|function} employees are paid {above|...,"Of all the {Production Managers|position}, whi...",When did we {fire|employment_action} {Jeff|name}?,{How many|function} employees were {born|dob} ...,Which employee was {hired|employment_action} w...,Who {reports|manager} into {Nan Singh|name}
11,What position is {Julia|name} in?,{Percentage|function} of Employees in departme...,Which employees have a {spouse|maritaldesc}?,how much {money|money} does {Mia|name} make?,what {number of|function} people {earn|money} ...,above {average|function} {earning|money} emplo...,How long has {Mia|name} worked here?,{How many|function} people have {worked here|e...,Which employees have {been with us|employment_...,can i have the names of employees who report t...


In [16]:
#label_chk(intent_txt, ent_dict)

In [17]:
#get_labels("What is the {manager|manager} name of {Julia|name}?")

<h4>Generate Gazetteers</h4>

In [18]:
#gen_gazetteers(ent_dict)

<h4>Generate Mapping.json</h4>

In [None]:
#gen_map_json_files(ent_dict)

<h4>Data Augmentation</h4>

In [25]:
def entity_swap(sentence, positions, uniq):
    chars = list(sentence)
    for i in reversed(range(len(positions))):
        pos = positions[i]
        ent = ''.join(chars[pos[0]:(pos[1] + 1)])
        excluded = [] #['age']
        #print("ENTITY FOUND")
        #print(ent)
        kv = get_kv([ent])
        #print(kv)       
        for i in range(len(kv[0])):
            if kv[1][i] in uniq and kv[1][i] not in excluded:
                new_ent = "{" + random.choice(uniq[kv[1][i]]) + "|" + kv[1][i] + '}'
                chars[pos[0]:(pos[1]+1)] = list(new_ent)
                #print(new_ent)
    #print("FINAL")
    return "".join(chars)

In [24]:
# Remove Labelling
def entity_label_remove(sentence, positions, uniq):
    str_list = sentence.replace("{", "").replace("}", "").split()
    for idx, elem in enumerate(str_list):
        if "|" in elem: str_list[idx] = elem.split('|')[0]
    return " ".join(str_list)

In [None]:
#entity_swap("What is the {manager|manager} name of {Julia|name}?", [[12, 28], [38, 49]], uniq)
" ".join("i like to jump".split())

'i like to jump'

In [None]:
intent_txt

Unnamed: 0,get_info,get_aggregate,get_employees,get_salary,get_salary_aggregate,get_salary_employees,get_date,get_date_range_aggregate,get date_range_employees,get_hierarchy
2,What is {nan|name}'s race?,What is the {total|function} {number of|functi...,Give me employees who are {single|maritaldesc},What is {Mia|name}'s {pay|money}?,What is the {median|function} {pay|money} of {...,Which employee(s) have {lowest|extreme} {incom...,What is the {date of hiring|employment_action}...,What {percentage|function} of employees were {...,Give me a list of people {hired|employment_act...,Who is {Mia|name}'s {manager|manager}?
3,Is {Michael|name} {married|maritaldesc}?,What {percent|function} of employees {exceeded...,All employees from {MA|state},Tell me who earned the {least|extreme} that wa...,what {percent|function} of employees {make|mon...,who is the {highest|extreme} {earning|money} {...,When did {Amy|name} {join|employment_action} t...,What {percent|function} of employees were {hir...,Tell me about employees who {started|employmen...,Which employees have {Julia|name} as their {ma...
4,What is {Nan|name}'s official position?,What is the {percentage|function} of new grads...,Which employees have been recently {terminated...,What is the {pay rate|money} of {Julia|name}?,What is the {average|function} {pay rate|money}?,For employees {hired|employment_action} {betwe...,How long has {Ivan|name} been with the company?,What {percentage|function} of employees were {...,Which employees were not yet {born|dob} when {...,What is the name of {Julia|name}'s {manager|ma...
5,did {Nan|name} hear about us through {Glassdoo...,What is the {average|function} {age|age}?,Which employees are not {US citizens|citizende...,What is {Nan|name}'s {pay rate|money}?,What is the {average|function} {pay|money} of ...,what are the {salaries|money} for employees th...,When was {Nan|name} {fired|employment_action}?,What {percent|function} of all our employees w...,list the employees who {joined|employment_acti...,Who is {Michael|name}'s {manager|manager}?
6,give me {Nan|name}'s race please,What {percent|function} of employees are manag...,Which employees were {let go|employment_action...,How much does {Michael|name} {make|money}?,Calculate the {average|function} {pay rate|mon...,what are our {top|extreme} {earners|money} {ma...,How long was {Mia|name} working for?,What {percentage|function} of employees were {...,Which employess were {hired|employment_action}...,Who is {John Reeder|name}'s {manager|manager}?
7,What is {Mia|name}'s employment status?,What's the {average|function} {age of|age} emp...,{managers|position},Is {Mia|name} being {paid|money} {$40k|sys_amo...,{average|function} {pay rate|money} for {women...,{below|comparator} {average|function} {earning...,When was {Michael|name}'s {date of hire|employ...,{How many|function} employees were {hired|empl...,Which employees were {hired|employment_action}...,Who is {Mia Brown|name}'s {manager|manager}?
8,Is {Ivan|name} from out of state?,What {percentage|function} of the employees ar...,Which employees have been {terminated|employme...,Does {Mia|name} get {$|money}{70k|sys_number} ...,What's the {average|function} {pay rate|money}?,Tell me who all are {making|money} {more than|...,What is {Nan|name}'s {date of birth|dob}?,{How many|function} people were based out of {...,Who worked for Cisco for {less than|comparator...,Who is the {manager|manager} for {Bob|name}
9,Does {Michael|name} {still work at|employment_...,What {percentage|function} of employees are {e...,Which employees have been with the company lon...,When we let {Nan|name} go {fired|employment_ac...,What is the {average|function} {pay rate|money...,Which {software engineers|position} are {paid|...,What year was {Mia|name} {hired|employment_act...,What {percentage|function} of employees have b...,Which employees have been {hired|employment_ac...,What is {Mia|name}'s {manager|manager}'s name?
10,What is {Mia|name}'s {performance score|perfor...,What {percentage|function} of employees are {f...,Which employees have gotten only {positive fee...,{pay rate|money} of {Mia|name},{How many|function} employees are paid {above|...,"Of all the {Production Managers|position}, whi...",When did we {fire|employment_action} {Jeff|name}?,{How many|function} employees were {born|dob} ...,Which employee was {hired|employment_action} w...,Who {reports|manager} into {Nan Singh|name}
11,What position is {Julia|name} in?,{Percentage|function} of Employees in departme...,Which employees have a {spouse|maritaldesc}?,how much {money|money} does {Mia|name} make?,what {number of|function} people {earn|money} ...,above {average|function} {earning|money} emplo...,How long has {Mia|name} worked here?,{How many|function} people have {worked here|e...,Which employees have {been with us|employment_...,can i have the names of employees who report t...


In [12]:
def data_augment(df, uniq):
    df = intent_txt
    for col in df:
        augment = []
        print(col.upper() + "=============================================================")
        idx = 2
        nan = is_nan(df[col][idx])
        l_dict = {}
        while(idx < len(df)):
                if(is_nan(df[col][idx])): break
                #print(idx)
                sentence = df[col][idx]
                labels, pos = get_labels(sentence)
                for i in range(3): augment.append(entity_swap(sentence, pos, uniq))
                idx += 1
        # Create files
        #augment = set(augment)
        #if((250 - idx) < len(augment)): augment = random.sample(augment, 250 - idx)
        print("Augmented Lines Generated: " + str(len(augment)))
        with open('data_augment/' + col + ".txt", 'w+') as filehandle:  
            filehandle.writelines("%s\n" % line for line in augment)

#data_augment(intent_txt, uniq)

In [13]:
from mindmeld import configure_logs; configure_logs()
from mindmeld.components.nlp import NaturalLanguageProcessor
nlp = NaturalLanguageProcessor(app_path='./hr_assistant')
nlp.build()

  from numpy.core.umath_tests import inner1d


Fitting domain classifier
Loading raw queries from file ./hr_assistant/domains/date/get_date/train.txt
Loading raw queries from file ./hr_assistant/domains/date/get_date_range_aggregate/train.txt
Loading raw queries from file ./hr_assistant/domains/date/get_date_range_employees/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_aggregate/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_employees/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_info/train.txt
Loading raw queries from file ./hr_assistant/domains/greeting/exit/train.txt
Loading raw queries from file ./hr_assistant/domains/greeting/greet/train.txt
Loading raw queries from file ./hr_assistant/domains/hierarchy/get_hierarchy/train.txt
Loading raw queries from file ./hr_assistant/domains/salary/get_salary/train.txt
Loading raw queries from file ./hr_assistant/domains/salary/get_salary_aggregate/train.txt
Loading raw queries from file ./hr_assistant/do

100%|██████████| 245/245 [00:00<00:00, 1039.51it/s]

Loaded 245 documents
Fitting role classifier: domain='hierarchy', intent='get_hierarchy', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 61.33it/s]

Loaded 1 document
Fitting role classifier: domain='hierarchy', intent='get_hierarchy', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 148.53it/s]

Loaded 4 documents
Fitting intent classifier: domain='greeting'





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 99.52%, params: {'C': 1, 'class_weight': {0: 1.5304182509505702, 1: 0.883067896060352}, 'fit_intercept': False}
Fitting entity recognizer: domain='greeting', intent='exit'
No entity model configuration set. Using default.
There are no labels in this label set, so we don't fit the model.
Fitting entity recognizer: domain='greeting', intent='greet'
No entity model configuration set. Using default.
There are no labels in this label set, so we don't fit the model.
Fitting entity recognizer: domain='unsupported', intent='unsupported'
No entity model configuration set. Using default.
There are no labels in this label set, so we don't fit the model.
Fitting intent classifier: domain='general'
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 98.87%, params: {'C': 1, 'class_weight': {0: 1.105, 1: 0.906276150627615, 2: 1.0173267326732673}, 'fit_intercept': True}
Fitting entity

100%|██████████| 245/245 [00:00<00:00, 1125.07it/s]

Loaded 245 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 552.96it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'



100%|██████████| 5/5 [00:00<00:00, 123.63it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'





Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'


100%|██████████| 3/3 [00:00<00:00, 119.87it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 189.29it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 659.57it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='department'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'


100%|██████████| 6/6 [00:00<00:00, 147.54it/s]

Loaded 6 documents





Fitting role classifier: domain='general', intent='get_info', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'


100%|██████████| 20/20 [00:00<00:00, 393.17it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'





Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'


100%|██████████| 5/5 [00:00<00:00, 144.42it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 53.63it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='reason_for_termination'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_reason_for_termination'
Elasticsearch index 'synonym_reason_for_termination' for application 'hr_assistant' already exists!
Loading index 'synonym_reason_for_termination'



100%|██████████| 17/17 [00:00<00:00, 290.49it/s]

Loaded 17 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'





Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 96.63it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='performance_score'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'



100%|██████████| 7/7 [00:00<00:00, 172.25it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'
Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'



100%|██████████| 39/39 [00:00<00:00, 642.07it/s]

Loaded 39 documents
Fitting entity recognizer: domain='general', intent='get_employees'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 94.23%, params: {'C': 10000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resol

100%|██████████| 28/28 [00:00<00:00, 630.81it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 222.87it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='reason_for_termination'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_reason_for_termination'
Elasticsearch index 'synonym_reason_for_termination' for application 'hr_assistant' already exists!
Loading index 'synonym_reason_for_termination'



100%|██████████| 17/17 [00:00<00:00, 282.74it/s]

Loaded 17 documents





Fitting role classifier: domain='general', intent='get_employees', entity_type='performance_score'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'


100%|██████████| 7/7 [00:00<00:00, 170.45it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 111.08it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 113.71it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='employment_status'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'


100%|██████████| 5/5 [00:00<00:00, 106.96it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'



100%|██████████| 5/5 [00:00<00:00, 143.13it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'





Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'


100%|██████████| 2/2 [00:00<00:00, 76.83it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'





Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 89.37it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 44.26it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 416.28it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='department'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'


100%|██████████| 6/6 [00:00<00:00, 111.60it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='race'
No role model configuration set. Using default.





Entity data file not found at './hr_assistant/entities/race/gazetteer.txt'. Proceeding with empty entity data.
Entity mapping file not found at './hr_assistant/entities/race/mapping.json'. Proceeding with empty entity data.
Entity map file not found at ./hr_assistant/entities/race/mapping.json
Importing synonym data to synonym index 'synonym_race'
Elasticsearch index 'synonym_race' for application 'hr_assistant' already exists!
Loading index 'synonym_race'


0it [00:00, ?it/s]

Loaded 0 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'
Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'



100%|██████████| 39/39 [00:00<00:00, 711.29it/s]

Loaded 39 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 62.74it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_employees', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 81.05it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'





Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 54.53it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_ordinal'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 49.71it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'





Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 85.59it/s]

Loaded 2 documents
Fitting entity recognizer: domain='general', intent='get_aggregate'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 92.90%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolut

100%|██████████| 28/28 [00:00<00:00, 617.71it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 446.26it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='reason_for_termination'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_reason_for_termination'
Elasticsearch index 'synonym_reason_for_termination' for application 'hr_assistant' already exists!
Loading index 'synonym_reason_for_termination'



100%|██████████| 17/17 [00:00<00:00, 331.16it/s]

Loaded 17 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'





Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'


100%|██████████| 6/6 [00:00<00:00, 176.41it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='performance_score'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'


100%|██████████| 7/7 [00:00<00:00, 154.07it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'





Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'


100%|██████████| 4/4 [00:00<00:00, 116.82it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'



100%|██████████| 5/5 [00:00<00:00, 126.44it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'



100%|██████████| 5/5 [00:00<00:00, 122.65it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 49.73it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'



100%|██████████| 4/4 [00:00<00:00, 81.36it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='name'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'


100%|██████████| 245/245 [00:00<00:00, 1041.97it/s]

Loaded 245 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 437.76it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 134.47it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 48.38it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'





Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'


100%|██████████| 39/39 [00:00<00:00, 729.64it/s]

Loaded 39 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'





Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'


100%|██████████| 3/3 [00:00<00:00, 98.77it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 81.61it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 48.29it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sex'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 66.28it/s]

Loaded 2 documents
Fitting intent classifier: domain='date'





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 98.36%, params: {'C': 10, 'class_weight': {0: 0.8130522088353414, 1: 1.9378205128205128, 2: 0.8793650793650793}, 'fit_intercept': True}
Fitting entity recognizer: domain='date', intent='get_date'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 96.07%, params: {'C': 10000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application '

100%|██████████| 245/245 [00:00<00:00, 1053.46it/s]

Loaded 245 documents
Fitting role classifier: domain='date', intent='get_date', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 34.73it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 50.59it/s]

Loaded 1 document
Fitting entity recognizer: domain='date', intent='get_date_range_aggregate'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 91.81%, params: {'C': 10000, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_c

100%|██████████| 4/4 [00:00<00:00, 117.11it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 885.49it/s]

Loaded 28 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'



100%|██████████| 4/4 [00:00<00:00, 126.93it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 70.03it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='time_interval'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_interval'
Elasticsearch index 'synonym_time_interval' for application 'hr_assistant' already exists!
Loading index 'synonym_time_interval'



100%|██████████| 10/10 [00:00<00:00, 240.31it/s]

Loaded 10 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 63.55it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 56.06it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 61.02it/s]

Loaded 1 document





Fitting entity recognizer: domain='date', intent='get_date_range_employees'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.68%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_res

100%|██████████| 4/4 [00:00<00:00, 122.06it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 401.71it/s]

Loaded 24 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 97.52it/s]

Loaded 3 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_interval'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 768.32it/s]

Loaded 28 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 106.51it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 175.42it/s]

Loaded 6 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_duration'
No role model configuration set. Using default.





Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'


100%|██████████| 5/5 [00:00<00:00, 129.22it/s]

Loaded 5 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 72.96it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='time_interval'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_interval'
Elasticsearch index 'synonym_time_interval' for application 'hr_assistant' already exists!
Loading index 'synonym_time_interval'


100%|██████████| 10/10 [00:00<00:00, 239.38it/s]

Loaded 10 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 65.65it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'





Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 80.54it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='racedesc'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'


100%|██████████| 6/6 [00:00<00:00, 131.62it/s]

Loaded 6 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_number'





No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'


100%|██████████| 1/1 [00:00<00:00, 50.59it/s]

Loaded 1 document
Fitting intent classifier: domain='salary'





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 96.64%, params: {'C': 100, 'class_weight': {0: 0.8294469357249626, 1: 1.1142528735632182, 2: 1.1555555555555554}, 'fit_intercept': True}
Fitting entity recognizer: domain='salary', intent='get_salary_aggregate'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 92.81%, params: {'C': 10000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configur

100%|██████████| 245/245 [00:00<00:00, 968.93it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 68.02it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 123.92it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 557.83it/s]

Loaded 24 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 106.58it/s]

Loaded 3 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 666.16it/s]

Loaded 28 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 40.05it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='money'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_money'
Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'



100%|██████████| 1/1 [00:00<00:00, 48.78it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'





Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 63.10it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_time'
No role model configuration set. Using default.





Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'


100%|██████████| 6/6 [00:00<00:00, 128.69it/s]

Loaded 6 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 484.68it/s]

Loaded 20 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'





Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'


100%|██████████| 2/2 [00:00<00:00, 69.83it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='manager'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'


100%|██████████| 1/1 [00:00<00:00, 55.72it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 62.54it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 116.56it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='function'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 72.87it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_amount-of-money'
No role model configuration set. Using default.
Fitting entity recognizer: domain='salary', intent='get_salary_employees'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.06%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolut

100%|██████████| 1/1 [00:00<00:00, 42.79it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 1123.29it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 89.09it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 148.84it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 433.39it/s]

Loaded 24 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='citizendesc'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'


100%|██████████| 3/3 [00:00<00:00, 84.84it/s]

Loaded 3 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'



100%|██████████| 5/5 [00:00<00:00, 108.78it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='money'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_money'
Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'


100%|██████████| 1/1 [00:00<00:00, 46.27it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 52.62it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_time'
No role model configuration set. Using default.





Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'


100%|██████████| 6/6 [00:00<00:00, 170.42it/s]

Loaded 6 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 492.16it/s]

Loaded 20 documents





Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'


100%|██████████| 2/2 [00:00<00:00, 65.80it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 53.93it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 68.42it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 122.33it/s]

Loaded 6 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'





Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 129.49it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_amount-of-money'
No role model configuration set. Using default.
Fitting entity recognizer: domain='salary', intent='get_salary'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 98.50%, params: {'C': 1000000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' alread

100%|██████████| 245/245 [00:00<00:00, 1136.38it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 162.93it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='money'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_money'
Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'



100%|██████████| 1/1 [00:00<00:00, 62.81it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 74.08it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 79.78it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 111.24it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_amount-of-money'
No role model configuration set. Using default.





In [42]:
from mindmeld.components.dialogue import Conversation
conv = Conversation(nlp=nlp, app_path='../hr_assistant')

The application package hr_assistant is already imported.


In [43]:
conv.say("What is nan's official position")

['What would you like to know about Nan Singh?', 'Listening...']

In [46]:
nlp.process("what is nan's official position")

{'text': "what is nan's official position",
 'domain': 'general',
 'intent': 'get_info',
 'entities': [{'text': 'nan',
   'type': 'name',
   'role': None,
   'value': [{'cname': 'Nan Singh', 'score': 62.84209, 'top_synonym': 'nan'},
    {'cname': 'Daniff Hernandez',
     'score': 10.082616,
     'top_synonym': 'hernandez'},
    {'cname': 'Nilson Fernandes',
     'score': 10.082616,
     'top_synonym': 'fernandes'},
    {'cname': 'Samuel MacLennan',
     'score': 9.67027,
     'top_synonym': 'maclennan'}],
   'span': {'start': 8, 'end': 10}}]}

In [48]:
import json
df = intent_txt
columns = df.columns

In [None]:
columns

In [19]:
columns[0]

'get_info'

In [22]:
f = open("qa_log/" + columns[0] + ".txt", "w")
f.write("Test")


In [51]:
orig_conv = Conversation(nlp=nlp, app_path='../hr_assistant')

issues = []
col = columns[0]
f = open("qa_log/" + col + ".txt", "w")
f.write(col.upper() + "=============================================================" + '\n')
idx = 2
nan = is_nan(df[col][idx])
while(idx < len(df)):
        if(is_nan(df[col][idx])): break
        sentence = df[col][idx]
        f.write("QUERY >>>>>>>>>>>>>>>>>>>>>"  + '\n')
        f.write("Q: " + sentence  + '\n')
        labels, pos = get_labels(sentence)
        q = entity_label_remove(sentence, pos, uniq)
        conv = orig_conv
        try:
            ans = conv.say(q)[0]
            f.write("A: " + ans  + '\n')
            f.write("Process: " + '\n')
            f.write(json.dumps(nlp.process(ans), indent=2))
        except Exception as e:
            f.write("CONVERSATION BREAK XXXXXXXXXXXXXXXXXXXXXXXXXX"  + '\n')
            f.write(repr(e))
            issues.append(q)
        f.write('\n')
        idx += 1
print(issues)
f.close()

The application package hr_assistant is already imported.
[]


In [None]:
print(len(issues))