In [1]:
import numpy as np
import pandas as pd
import random
import os
import json

<h2>Functions</h2>

<h3>General Functions</h3>

In [2]:
# Check if a Value is Nan
# param x - Value to check
# return boolean - If it is a Nan
def is_nan(x):
    return (x is np.nan or x != x)

# Check All Values in List are Nan
# param x - List to check
# return boolean - If it is all Nan
def list_is_nan(x):
    for i in x:
        if not is_nan(i): return False
    return True

# Converts names to single string
# param names (list) - List of names in the format [['Mia', 'Brown'],[..]]
# return unique (list) - List of names in the format ['Mia Brown', ..]
def name_to_str(names):
    return [",".join(name).replace(",", " ") for name in names]

# Converts names to single string
# param names (list) - List of names in the format ['Mia Brown', ..]
# return names_list (list) - List of names in the format [['Mia', 'Brown'],[..]]
def str_name_to_list(names):
    for i in range(len(names)):
        full_name = names[i].split()
        names[i] = [full_name[0], full_name[len(full_name) - 1]]
    return names

# Extracts All of the Elements in a List of (One or Two D) Lists
# param two_d_list (list) List of (One or Two D) Lists
# return new_elems (list) List of elements
def extract_2d_list(two_d_list):
    elems, new_elems = [], []
    for elem in two_d_list: elems.extend(elem)
    for elem in elems:
        if isinstance(elem, list): new_elems.extend(elem)
        else: new_elems.append(elem)
    return new_elems

<h3>Get Unique Values</h3>

In [3]:
# Get the Names of Employees in the HR Dataset
# param df (dataframe) - Dataframe with names
# param column (str) - Column can be 'Employee Name' or 'Manager Name'
# Returns names (list) - Array of names (['First', 'Last'])
def get_names(df, column='Employee Name'):
    names = [df[[column]].iloc[i][0] for i in range(df.shape[0] - 1)]
    for i in range(len(names)):
        try:
            full_name = names[i].split(',')
            first = full_name[1].strip().split()[0]
            last = full_name[0]
            names[i] = [first, last]
        except:
            names[i] = ['Jeremy', 'Prater']
    return names

# Filter to list of unique names
# param names (list) - List of names in the format [['Mia', 'Brown'],[..]]
# param avoid (list) - List of names to avoid duplicates of
# return unique (list) - List of names in the format [['Mia', 'Brown'],[..]]
def filter_names_helper(names, avoid = []):
    unique = []
    first, last = [], []
    if(len(avoid) != 0):
        first = [n[0] for n in avoid]
        last = [n[1] for n in avoid]
    for name in names:
        if((name[0] not in first) and (name[1] not in last)):
            first.append(name[0])
            last.append(name[1])
            unique.append(name)
    return unique

# Filter to list of unique names that don't overlap with manager names
# param df (dataframe) - Dataframe with names 
# return unique (list) - List of names in the format [['Mia', 'Brown'],[..]]
def get_filtered_names(df):
    managers = str_name_to_list(get_uniq_str(df, "Manager Name"))
    managers = filter_names_helper(managers)
    survey_users = [['Mia','Brown'], ['Ivan','Rogers'], ['Julia','Soto'], ['Nan','Singh']]
    user_names = get_names(hr_data)
    user_names = filter_names_helper(user_names, avoid=survey_users)
    user_names = filter_names_helper(user_names, avoid=managers)
    user_names.extend(survey_users)
    return user_names

# Update Name Synonyms
def name_syn_update(ent_dict):
    syn_names = []
    for name in ent_dict['name'][0]:
        name_splt = name.split()
        name_syn = [name.lower(), name_splt[0], name_splt[0].lower(),
                    name_splt[1], name_splt[1].lower()]
        syn_names.append(name_syn)
    return syn_names

In [4]:
# Get the Unique Names of Employees in the HR Dataset
# param df (dataframe) - Dataframe with names
# return unique (list) - List of names in the format ['Mia Brown', ..]
def get_uniq_names(df):
    return filter_names(get_names(df))

# Get Unique String Values of a Dataframe Column
# param df (dataframe) - Source Dataframe
# param col_name (str) - Name of Column to get unique values'
# return uniq_arr (arr) - Array of Unique Values
def get_uniq_str(df, col_name):
    return [i.lower().strip() for i in df[col_name].unique()[:-1]]

# Get Unique String Values of a Dataframe Column
# param df (dataframe) - Source Dataframe
# param col_name (str) - Name of Column to get unique values'
# return uniq_arr (arr) - Array of Unique Values
def get_uniq_num(df, col_name):
    return [str(int(i)) for i in df[col_name].unique()[:-1]]

# Generate a Dictionary with Unique Values for Select Columns
# param df (dataframe) - Source Dataframe
# return uniq (dict) - Dictionary that maps entities to unique values
def gen_uniq_dict(df):
    # Get Unique Values of Relevant Columns
    uniq = {}
    # Predefined
    uniq['name'] = name_to_str(get_filtered_names(df))
    uniq['sex'] = ['male', 'female']
    uniq['employment_status'] = ['active', 'voluntarily terminated', 'terminated for a cause',
                          'on a leave of absence' + 'going to start work in the future']
    uniq['performance_score'] = ['fully meet performance expecations', 'are too early to review', 
                         'meet 90-day expectations', 'are exceptional', 'need improvement', 'exeed expecations']
    # Custom Preprocessing
    uniq['state'] = hr_data['State'].unique()[:-1]
    # Standard Preprocessing 
    uniq['age'] = get_uniq_num(hr_data, 'Age')
    uniq['maritaldesc'] = get_uniq_str(hr_data, "MaritalDesc")
    uniq['citizendesc'] = get_uniq_str(hr_data, "CitizenDesc")
    uniq['racedesc'] = get_uniq_str(hr_data, "RaceDesc")
    uniq['department'] = get_uniq_str(hr_data, "Department")
    uniq['position'] = get_uniq_str(hr_data, "Position")
    #uniq['manager'] = name_to_str(str_name_to_list(get_uniq_str(hr_data, "Manager Name")))
    uniq['employee_source'] = get_uniq_str(hr_data, "Employee Source")
    return uniq

# Generate a Dictionary with Unique Values for Select Columns
# param uniq (dict) - Dictionary that maps entities to unique values
# param ent_dict (dict) - Dictionary that Contains Entity Information
# return uniq (dict) - Dictionary that maps entities to unique values
def uniq_dict_update(uniq, ent_dict):
    uniq['money'] = ent_dict['money'][1][0]
    uniq['time_interval'] = extract_2d_list(ent_dict['time_interval'])
    uniq['time_recur'] = extract_2d_list(ent_dict['time_recur'])
    uniq['function'] = extract_2d_list(ent_dict['function'])
    uniq['extreme'] = extract_2d_list(ent_dict['extreme'])
    uniq['employment_action'] = extract_2d_list(ent_dict['employment_action'])
    uniq['date_compare'] = extract_2d_list(ent_dict['date_compare'])
    uniq['manager'] = extract_2d_list(ent_dict['manager'])
    return uniq

In [5]:
#extract_2d_list(ent_dict['manager'])

<h3>Load, Parse and Clean Entity Data</h3>

In [6]:
# Load entity data frame from CSV
# param path (str) - Path to csv
# return ent_df (dataframe) - Entity Info Dataframe
def load_ent(path):
    df = pd.read_csv(path).iloc[:, :4]
    ct = 0
    while (ct < len(df) and not list_is_nan(df.iloc[ct, :].values)): ct += 1
    return df.iloc[:ct, :]

# Parse CSV that contains [Entity, Options, Synonyms, Gazetteer]
# param ent_df (dataframe) - Contains the columns listed above
# return ent_dict (dict) - Uncleaned dictionary where the keys are the entities
#                          and the values are [options (arr), synonyms (arr), gazetteer (arr)]
def ent_parse(ent_df):
    ent_dict = {}
    ct = 0
    curr_ent = ''
    while(ct < len(ent_df)):
        row = ent_df.iloc[ct]
        # update curr_ent if new entity
        if(not is_nan(row['Entity'])): 
            curr_ent = row['Entity']
            ent_dict[curr_ent] = [[], [], []]
        # check if options is a list
        if(not is_nan(row['Options']) and row['Options'].count(',') > 2):
            ent_dict[curr_ent][0] = row['Options']
            if(not is_nan(row['Synonyms'])): 
                ent_dict[curr_ent][1] = row['Synonyms'].split(',')
            if(not is_nan(row[3])): ent_dict[curr_ent][2] = row['Gazetteer']
        # single entry in option column
        else:
            ent_dict[curr_ent][0].append(row['Options'])
            ent_dict[curr_ent][1].append(row['Synonyms'])
            ent_dict[curr_ent][2].append(row['Gazetteer'])
        ct += 1
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_options(ent_dict):
    for i in ent_dict.keys():
        old_opt = ent_dict[i][0]
        new_opt = []
        for word in old_opt:
            if(not is_nan(word)):
                new_opt.append(word.replace("'", "").replace(",", ""))
        ent_dict[i][0] = new_opt
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_synonyms(ent_dict):
    for i in ent_dict.keys():
        old_syn = ent_dict[i][1]
        new_syn = []
        if(len(old_syn) != 0):
            for j in old_syn:
                if(not is_nan(j)): new_syn.append([string.strip() for string in j.split(',')])
                else: new_syn.append(j)
        ent_dict[i][1] = new_syn
    return ent_dict

# Cleans the gazetteers of the ent dictionary
# param ent_dict (dictionary) - Entity Dictionary
# return ent_dict (dictionary) - Cleaned Entity Dictionary
def clean_gazetteer(ent_dict):
    for i in ent_dict.keys():
        old_gaz = ent_dict[i][2]
        clean_gaz = []
        if (len(old_gaz) != 0):        
            if (type(old_gaz) is str): clean_gaz = old_gaz.split(',')
            elif(not is_nan(old_gaz[0])): clean_gaz = (old_gaz[0].split(','))
        ent_dict[i][2] = [i.strip() for i in clean_gaz]
    return ent_dict

# Cleans an Entity Dictionary
# ent_dict (dict) - Uncleaned dictionary of parsed entity information
# uniq (dict) - Dictionary that maps entities to unique values
# return cleaned_ent_dict (dict) - Cleaned dictionary
def clean_ent_dict(ent_dict, uniq):
    ent_dict = clean_options(ent_dict)
    ent_dict = clean_synonyms(ent_dict)
    ent_dict = clean_gazetteer(ent_dict)
    ent_dict['name'][0] = uniq['name']
    ent_dict['state'][0] = uniq['state']
    ent_dict['age'][0] = uniq['age']
    return ent_dict

# Parse and Clean Entity Data
# Parse CSV that contains [Entity, Options, Synonyms, Gazetteer]
# param ent_df (dataframe) - Contains the columns listed above
def parse_clean_ent(ent_df, uniq):
    ent = ent_parse(ent_df)
    return clean_ent_dict(ent, uniq)

<h3>File Generation</h3>

In [7]:
# Convert a list into a text file
# param folder (str) - Name of the folder to write the file
# param lines (list) - List of values to write to file
def list_to_txt_file(folder, lines):
    directory = 'hr_assistant/entities/' + folder + '/'
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + 'gazetteer.txt', 'w+') as filehandle:  
        filehandle.writelines("%s\n" % line for line in lines)
        
# Convert a dict into a json file
# param folder (str) - Name of the folder to write the file
# param dict (dict) - Json dict to write to file
def dict_to_json_file(folder, json_dict):
    directory = 'hr_assistant/entities/' + folder + '/'
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + "mapping.json", "w+") as f:
        json_str = json.dumps(json_dict, indent=4)
        f.write(json_str)
        
# Generate a Mapping JSON Dict to create Mapping.json file
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to generate the gazetteer list for
# return json_dict (dict) - Json Dict in the proper format for mapping.json
def gen_map_json(ent_dict, entity):
    opt = ent_dict[entity][0]
    syn = ent_dict[entity][1]
    # Check if the Options and the Synonyms Align
    if(len(opt) != len(syn)): syn = [[] for i in range(len(opt))]
    return gen_map_json_helper(opt, syn)

# Helper function to Generate a Mapping JSON Dictionary
# param options (list) - Array of options
# param synonyms (2d list) - Array of synonym arrays corresponding to options
# return json_dict (dict) - Json Dict in the proper format for mapping.json
def gen_map_json_helper(options, synonyms):
    json_dict = {}
    json_dict['entities'] = []
    for i in range(len(options)):
        new_dict = {}
        if(is_nan(synonyms[i])): new_dict['whitelist'] = []
        else: new_dict['whitelist'] = synonyms[i]
        new_dict['cname'] = options[i]
        json_dict['entities'].append(new_dict)
    return json_dict

# Create Mapping.json files for Every Entity in an Entity Dictionary
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
def gen_map_json_files(ent_dict):
    for entity in ent_dict.keys():
        json_dict = gen_map_json(ent_dict, entity)
        if(not is_nan(json_dict)): dict_to_json_file(entity, json_dict)

# Collect All Synonyms for a Single Entity in Entity Dict
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to get Synyonyms for
# return synonyms (list) - List of Synonyms of Specified entity
def get_synonyms(ent_dict, entity):
    synonyms = []
    for syn in ent_dict[entity][1]:
        if(not is_nan(syn)): synonyms.extend(syn)
    return synonyms

# Generate a Gazetter List for an Entity
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param entity (str) - The entity to generate the gazetteer list for
# return gaz (list) - List of gazetteer word relevant to the entity
def gen_gaz_list(ent_dict, entity):
    gaz = []
    gaz.extend(ent_dict[entity][0])
    gaz.extend(get_synonyms(ent_dict, entity))
    gaz.extend(ent_dict[entity][2])
    return gaz

# Create Gazetteer Files for Every Entity in an Entity Dictionary
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
def gen_gazetteers(ent_dict):
    for entity in ent_dict.keys():
        gaz_list = gen_gaz_list(ent_dict, entity)
        list_to_txt_file(entity, gaz_list)

<h3>Debug Labelling</h3>

In [8]:
# Find all labels in a sentence
# pram text (str) String that may or may not contain labels in the form of {ent_type|ent}
# return labels (list) List of labels found in the text
def get_labels(text):
    chars = list(text)
    start, stop = 0, 0
    labels = []
    positions = []
    for i in range(len(chars)):
        if chars[i] == '{': 
            start = i
        if chars[i] == '}':
            stop = i
            txt = chars[start:(stop + 1)]
            labels.append("".join(chars[start:(stop + 1)]))
            positions.append([start, stop])
    return labels, positions

# Given a list of labels, Seperate Key and Values
# param labels (list) List of labels found in the text
# return kv (2d list) List of the Keys list and Values list [[key_array], [value_array]]
def get_kv(labels):
    k = []
    v = []
    for label in labels:
        if '|' in label:
            k.append(label.split('|')[0].replace("{", ""))
            v.append(label.split('|')[1].replace("}", ""))
    return [k,v]

# Check if a Synonym Exists in a Sentence that it is labelled
# param sentence (str) String that may or may not contain labels in the form of {ent_type|ent}
# param ent_dict (dict) - Dictionary that maps entities to options, synonyms, and gazetteer
# param kv_labels (2d list) List of the Keys list and Values list [[key_array], [value_array]]
# param col (str) Column in the dataframe
# param idx (int) Index of the Current Sentence in the Dataframe
def label_chk_helper(sentence, ent_dict, kv_labels, col, idx):
    sent_split = [word.lower() for word in sentence.split()]
    for ent in ent_dict:
        # Check for 'Entity Options' in text
        options = ent_dict[ent][0]
        for opt in options:
            if opt in sent_split and opt not in kv_labels[0] and opt not in kv_labels[1]:
                print("POTENTIAL OPTION-MISSING LABEL")
                print("OPTION: " + opt)
                print("ENTITY: " + ent)
                print("SENTENCE: " + sentence)
                print("Column: " + col)
                print("Index: " + str(idx + 2))
                print("==========================================")
        # Check for 'Synonym Options' in text
        syns = get_synonyms(ent_dict, ent)
        for syn in syns:
            if syn.lower() in sent_split and syn not in kv_labels[0]:
                print("POTENTIAL SYNONYM-MISSING LABEL")
                print("SYNONYM: " + syn)
                print("ENTITY: " + ent)
                print("SENTENCE: " + sentence)
                print("Column: " + col)
                print("Index: " + str(idx + 2))
                print("==========================================")
                
# Finds mismatches between labels in the Dataframe
# param kv_labels (2d list) List of the Keys list and Values list [[key_array], [value_array]]
# param sentence (str) String that may or may not contain labels in the form of {ent_type|ent}
# param l_dict (dict) Dictionary that keeps track of entity label mappings passed in from the main function
# param col (str) Column in the dataframe
# param idx (int) Index of the Current Sentence in the Dataframe
def chk_mismatch(labels, sentence, l_dict, col, idx):
    if(len(labels[0]) != 0):
        for ct in range(len(labels[0])):
            k = labels[0][ct]
            v = labels[1][ct]
            
            if k in l_dict: 
                if l_dict[k] != v:
                    print("POTENTIAL MISMATCH for: " + k)
                    print("CURRENT LABEL: " + v)
                    print("IN DICT: " + l_dict[k])
                    print("SENTENCE: " + sentence)
                    print("Column: " + col)
                    print("Index: " + str(idx + 2))
                    print("==========================================")
            else: l_dict[k] = v  

# Check if the Synonyms to Entities Were not Labelled in Training
# param df (dataframe) - Dataframe where each column is an intent and each row has sentence examples
# param ent_dict (dict) - Dictionary that contains entities as keys and synonyms
def label_chk(df, ent_dict):
    for col in df:
        print(col.upper() + "=============================================================")
        idx = 2
        nan = is_nan(df[col][idx])
        l_dict = {}
        while(idx < len(df)):
            if(is_nan(df[col][idx])): break
            sentence = df[col][idx]
            labels, pos = get_labels(sentence)
            labels = get_kv(labels)
            label_chk_helper(sentence, ent_dict, labels, col, idx)
            chk_mismatch(labels, sentence, l_dict, col, idx)
            idx += 1

<h2>Workflow</h2>

<h4>Load Data</h4>

In [26]:
hr_data = pd.read_csv('./custom_scripts/old_core_dataset.csv')
hr_data

Unnamed: 0,Employee Name,Employee Number,State,Zip,DOB,Age,Sex,MaritalDesc,CitizenDesc,Hispanic/Latino,...,Date of Hire,Date of Termination,Reason For Term,Employment Status,Department,Position,Pay Rate,Manager Name,Employee Source,Performance Score
0,"Brown, Mia",1.103024e+09,MA,1450.0,11/24/1985,32.0,Female,Married,US Citizen,No,...,10/27/2008,,N/A - still employed,Active,Admin Offices,Accountant I,28.50,Brandon R. LeBlanc,Diversity Job Fair,Fully Meets
1,"LaRotonda, William",1.106027e+09,MA,1460.0,4/26/1984,33.0,Male,Divorced,US Citizen,No,...,1/6/2014,,N/A - still employed,Active,Admin Offices,Accountant I,23.00,Brandon R. LeBlanc,Website Banner Ads,Fully Meets
2,"Steans, Tyrone",1.302053e+09,MA,2703.0,9/1/1986,31.0,Male,Single,US Citizen,No,...,9/29/2014,,N/A - still employed,Active,Admin Offices,Accountant I,29.00,Brandon R. LeBlanc,Internet Search,Fully Meets
3,"Howard, Estelle",1.211051e+09,MA,2170.0,9/16/1985,32.0,Female,Married,US Citizen,No,...,2/16/2015,4/15/2015,N/A - still employed,Active,Admin Offices,Administrative Assistant,21.50,Brandon R. LeBlanc,Pay Per Click - Google,N/A- too early to review
4,"Singh, Nan",1.307060e+09,MA,2330.0,5/19/1988,29.0,Female,Single,US Citizen,No,...,5/1/2015,,N/A - still employed,Active,Admin Offices,Administrative Assistant,16.56,Brandon R. LeBlanc,Website Banner Ads,N/A- too early to review
5,"Smith, Leigh Ann",7.110077e+08,MA,1844.0,6/14/1987,30.0,Female,Married,US Citizen,No,...,9/26/2011,9/25/2013,career change,Voluntarily Terminated,Admin Offices,Administrative Assistant,20.50,Brandon R. LeBlanc,Diversity Job Fair,Fully Meets
6,"LeBlanc, Brandon R",1.102024e+09,MA,1460.0,6/10/1984,33.0,Male,Married,US Citizen,No,...,1/5/2016,,N/A - still employed,Active,Admin Offices,Shared Services Manager,55.00,Janet King,Monster.com,Fully Meets
7,"Quinn, Sean",1.206043e+09,MA,2045.0,11/6/1984,33.0,Male,Married,Eligible NonCitizen,No,...,2/21/2011,8/15/2015,career change,Voluntarily Terminated,Admin Offices,Shared Services Manager,55.00,Janet King,Diversity Job Fair,Fully Meets
8,"Boutwell, Bonalyn",1.307060e+09,MA,2468.0,4/4/1987,30.0,Female,Married,US Citizen,No,...,2/16/2015,,N/A - still employed,Active,Admin Offices,Sr. Accountant,34.95,Brandon R. LeBlanc,Diversity Job Fair,90-day meets
9,"Foster-Baker, Amy",1.201031e+09,MA,2050.0,4/16/1979,38.0,Female,Married,US Citizen,no,...,1/5/2009,,N/A - still employed,Active,Admin Offices,Sr. Accountant,34.95,Board of Directors,Other,Fully Meets


In [27]:
#hr_data.columns

<h4>Create Unique Values Dictionary</h4>

In [28]:
uniq = gen_uniq_dict(hr_data)

In [29]:
#uniq

<h4>Load Entities</h4>

In [30]:
ent_df = load_ent('./custom_scripts/HR Manager Schema - Entities.csv')
ent_dict = parse_clean_ent(ent_df, uniq)
ent_dict['name'][1] = name_syn_update(ent_dict)
uniq = uniq_dict_update(uniq, ent_dict)
ent_dict

{'name': [['William LaRotonda',
   'Tyrone Steans',
   'Estelle Howard',
   'Leigh Smith',
   'Brandon LeBlanc',
   'Sean Quinn',
   'Bonalyn Boutwell',
   'Amy Foster-Baker',
   'Janet King',
   'Jennifer Zamora',
   'Renee Becker',
   'Taisha Goble',
   'Daniff Hernandez',
   'Jayne Horton',
   'Noelle Johnson',
   'Thomas Murray',
   'Randall Pearson',
   'Thelma Petrowsky',
   'Lori Roby',
   'Jason Salter',
   'Kramer Simard',
   'Simon Roup',
   'Ricardo Ruiz',
   'Peter Monroe',
   'Eric Dougall',
   'Rick Clayton',
   'Lisa Galia',
   'Leonara Lindsay',
   'Alejandro Bacong',
   'Anthony Cisco',
   'Linda Dolan',
   'Maria Gonzalez',
   'Carlos Merlos',
   'Tanya Morway',
   'Anita Shepard',
   'Neville Tredinnick',
   'Jumil Turpin',
   'Karthikeyan Ait Sidi',
   'Claudia Carr',
   'Donald Favis',
   'Bianca Roehrich',
   'Ann Daniele',
   'Jyoti Lajiri',
   'Jeremiah Semizoglou',
   'Joe South',
   'Sarah Warfield',
   'Elisa Bramante',
   'Michael Albert',
   'Charles Bozzi'

In [13]:
baby_names = pd.read_csv('./custom_scripts/baby-names.csv')

In [14]:
baby_names = list(set(baby_names['name']))
baby_names

['Ruie',
 'Orlena',
 'Wayland',
 'Ishmael',
 'Algot',
 'Norberto',
 'Tina',
 'Minta',
 'Kailyn',
 'Norval',
 'Deb',
 'Fleeta',
 'Christy',
 'Worth',
 'Jacqueline',
 'Delia',
 'Elder',
 'Gayla',
 'Celestine',
 'Karen',
 'Heriberto',
 'Semaj',
 'Dennie',
 'Madden',
 'Tyra',
 'Joana',
 'Jonathon',
 'Verl',
 'Cordie',
 'Denine',
 'Shani',
 'Lyle',
 'Shannon',
 'Vic',
 'Joesph',
 'Zeb',
 'Flo',
 'Tai',
 'Frona',
 'Anderson',
 'Rilla',
 'Bud',
 'Justus',
 'Dionte',
 'Wilton',
 'Dossie',
 'Jolie',
 'Isiah',
 'Brigid',
 'Letitia',
 'Serina',
 'Ralph',
 'Delores',
 'Sabra',
 'Jace',
 'Elonzo',
 'Chuck',
 'Jalynn',
 'Terese',
 'Shawnna',
 'Darrian',
 'Londyn',
 'Ilo',
 'Emma',
 'Fredy',
 'Marrion',
 'Rolla',
 'Giselle',
 'Loran',
 'Aldona',
 'Parlee',
 'Georgiann',
 'Durrell',
 'Nila',
 'Daniella',
 'Hector',
 'Richard',
 'Estell',
 'Otis',
 'Leonie',
 'Friend',
 'Dannielle',
 'Nola',
 'Janna',
 'Dolph',
 'Brittanie',
 'Jocelyn',
 'Deven',
 'Yael',
 'Altie',
 'Madelene',
 'Vanesa',
 'Thurston',


In [21]:
uniq['name']

['William LaRotonda',
 'Tyrone Steans',
 'Estelle Howard',
 'Leigh Smith',
 'Brandon LeBlanc',
 'Sean Quinn',
 'Bonalyn Boutwell',
 'Amy Foster-Baker',
 'Janet King',
 'Jennifer Zamora',
 'Renee Becker',
 'Taisha Goble',
 'Daniff Hernandez',
 'Jayne Horton',
 'Noelle Johnson',
 'Thomas Murray',
 'Randall Pearson',
 'Thelma Petrowsky',
 'Lori Roby',
 'Jason Salter',
 'Kramer Simard',
 'Simon Roup',
 'Ricardo Ruiz',
 'Peter Monroe',
 'Eric Dougall',
 'Rick Clayton',
 'Lisa Galia',
 'Leonara Lindsay',
 'Alejandro Bacong',
 'Anthony Cisco',
 'Linda Dolan',
 'Maria Gonzalez',
 'Carlos Merlos',
 'Tanya Morway',
 'Anita Shepard',
 'Neville Tredinnick',
 'Jumil Turpin',
 'Karthikeyan Ait Sidi',
 'Claudia Carr',
 'Donald Favis',
 'Bianca Roehrich',
 'Ann Daniele',
 'Jyoti Lajiri',
 'Jeremiah Semizoglou',
 'Joe South',
 'Sarah Warfield',
 'Elisa Bramante',
 'Michael Albert',
 'Charles Bozzi',
 'Webster Butler',
 'Elijiah Gray',
 'Jonathan Hogland',
 'Walter Immediato',
 'Ketsia Liebig',
 'Branno

In [24]:
uniq_first = []
names = list(uniq['name'])
for name in names: uniq_first.append(name.split()[0].lower())
uniq_first

IndexError: list index out of range

In [15]:
ent_dict['name'][0].append('')
ent_dict

{'name': [['William LaRotonda',
   'Tyrone Steans',
   'Estelle Howard',
   'Leigh Smith',
   'Brandon LeBlanc',
   'Sean Quinn',
   'Bonalyn Boutwell',
   'Amy Foster-Baker',
   'Janet King',
   'Jennifer Zamora',
   'Renee Becker',
   'Taisha Goble',
   'Daniff Hernandez',
   'Jayne Horton',
   'Noelle Johnson',
   'Thomas Murray',
   'Randall Pearson',
   'Thelma Petrowsky',
   'Lori Roby',
   'Jason Salter',
   'Kramer Simard',
   'Simon Roup',
   'Ricardo Ruiz',
   'Peter Monroe',
   'Eric Dougall',
   'Rick Clayton',
   'Lisa Galia',
   'Leonara Lindsay',
   'Alejandro Bacong',
   'Anthony Cisco',
   'Linda Dolan',
   'Maria Gonzalez',
   'Carlos Merlos',
   'Tanya Morway',
   'Anita Shepard',
   'Neville Tredinnick',
   'Jumil Turpin',
   'Karthikeyan Ait Sidi',
   'Claudia Carr',
   'Donald Favis',
   'Bianca Roehrich',
   'Ann Daniele',
   'Jyoti Lajiri',
   'Jeremiah Semizoglou',
   'Joe South',
   'Sarah Warfield',
   'Elisa Bramante',
   'Michael Albert',
   'Charles Bozzi'

In [16]:
ent_dict['name'][1].append(baby_names)
ent_dict

{'name': [['William LaRotonda',
   'Tyrone Steans',
   'Estelle Howard',
   'Leigh Smith',
   'Brandon LeBlanc',
   'Sean Quinn',
   'Bonalyn Boutwell',
   'Amy Foster-Baker',
   'Janet King',
   'Jennifer Zamora',
   'Renee Becker',
   'Taisha Goble',
   'Daniff Hernandez',
   'Jayne Horton',
   'Noelle Johnson',
   'Thomas Murray',
   'Randall Pearson',
   'Thelma Petrowsky',
   'Lori Roby',
   'Jason Salter',
   'Kramer Simard',
   'Simon Roup',
   'Ricardo Ruiz',
   'Peter Monroe',
   'Eric Dougall',
   'Rick Clayton',
   'Lisa Galia',
   'Leonara Lindsay',
   'Alejandro Bacong',
   'Anthony Cisco',
   'Linda Dolan',
   'Maria Gonzalez',
   'Carlos Merlos',
   'Tanya Morway',
   'Anita Shepard',
   'Neville Tredinnick',
   'Jumil Turpin',
   'Karthikeyan Ait Sidi',
   'Claudia Carr',
   'Donald Favis',
   'Bianca Roehrich',
   'Ann Daniele',
   'Jyoti Lajiri',
   'Jeremiah Semizoglou',
   'Joe South',
   'Sarah Warfield',
   'Elisa Bramante',
   'Michael Albert',
   'Charles Bozzi'

In [222]:
len(baby_names)

6782

In [219]:
ent_dict['name'][0]

['William LaRotonda',
 'Tyrone Steans',
 'Estelle Howard',
 'Leigh Smith',
 'Brandon LeBlanc',
 'Sean Quinn',
 'Bonalyn Boutwell',
 'Amy Foster-Baker',
 'Janet King',
 'Jennifer Zamora',
 'Renee Becker',
 'Taisha Goble',
 'Daniff Hernandez',
 'Jayne Horton',
 'Noelle Johnson',
 'Thomas Murray',
 'Randall Pearson',
 'Thelma Petrowsky',
 'Lori Roby',
 'Jason Salter',
 'Kramer Simard',
 'Simon Roup',
 'Ricardo Ruiz',
 'Peter Monroe',
 'Eric Dougall',
 'Rick Clayton',
 'Lisa Galia',
 'Leonara Lindsay',
 'Alejandro Bacong',
 'Anthony Cisco',
 'Linda Dolan',
 'Maria Gonzalez',
 'Carlos Merlos',
 'Tanya Morway',
 'Anita Shepard',
 'Neville Tredinnick',
 'Jumil Turpin',
 'Karthikeyan Ait Sidi',
 'Claudia Carr',
 'Donald Favis',
 'Bianca Roehrich',
 'Ann Daniele',
 'Jyoti Lajiri',
 'Jeremiah Semizoglou',
 'Joe South',
 'Sarah Warfield',
 'Elisa Bramante',
 'Michael Albert',
 'Charles Bozzi',
 'Webster Butler',
 'Elijiah Gray',
 'Jonathan Hogland',
 'Walter Immediato',
 'Ketsia Liebig',
 'Branno

In [15]:
ent_dict['name'][1][240]

['andrew szabo', 'Andrew', 'andrew', 'Szabo', 'szabo']

<h4>Labelling Check</h4>

In [16]:
intent_txt = pd.read_csv('./custom_scripts/HR Manager Schema - intent_master.csv')
intent_txt = intent_txt.iloc[2:, :]
intent_txt

Unnamed: 0,get_info,get_aggregate,get_employees,get_salary,get_salary_aggregate,get_salary_employees,get_date,get_date_range_aggregate,get_date_range_employees,get_hierarchy_up,get_hierarchy_down
2,what is {Phylicia Gosciminski|name}s org role,{percent|function} employees {below|comparator...,{female|sex} employees,Amount that {Julia|name} gets {paid|money},among all of the employees that found their jo...,Which {Sr. DBA|position} {earns|money} the {mo...,What year was {Lily DiNocco|name} {let go|empl...,{percent|function} of employees {born|dob} in ...,I want {male|sex} {born|dob} in the {1930s|tim...,is {Charles Bozzi|name} the {mentor|manager} f...,can i have the names of employees who {report ...
3,What position is {ivan|name} in?,{count|function} of workers are {less than|com...,employees {hispanic|racedesc}?,{joanne handschiegl|name} {each month|time_rec...,{sum|function} {pay|money} for {female|sex}?,Which employee(s) have {lowest|extreme} {incom...,Has {Sarah Warfield|name} been working here fo...,{1974|sys_time} {born|dob} employees {percent|...,Which employees did we {get rid of|employment_...,is {Peter Monroe|name} {managing|manager} {Amy...,which employees is {Ivan singh|name} the {mana...
4,Why did {Megan|name} get {fired|employment_act...,{How many|function} employees are {C-levels|po...,get me the {youngest|extreme} {five|sys_number...,{Mia|name} {earns|money} what amount {each day...,give me the {mean|function} {salary|money} for...,give me the {earners|money} for all of the emp...,What was the exact date when {desiree|name} wa...,What {percentage|function} of employees were {...,{forties|time_interval} {born|dob} employees w...,who is {helen billis|name}s {managing|manager}...,Who are those employees that are {under|compar...
5,Which department is {adrienne homberger|name} in?,Gimme the {percent|function} of {50|age} year ...,give me a list of {separated employees|marital...,What does {mia|name}'s {paycheck|money} look l...,what is the {highest|extreme} {amount|money} t...,give me the {earnings|money} for all of the em...,How long has Mr.{Knapp|name} worked here?,{1945|sys_time} {born|dob} employees {percent|...,Which are the employees such that in {2005|sys...,who is the {manager|manager} assigned to {luis...,I want to know if {Amy Dunn|name} is a {manger...
6,is {abdellah veera|name} a {cio|position} or not?,{How many|function} people are {performing bad...,employees that live in {california|state},{webster|name} is {earning|money} what amount ...,What is the {total|function} {earnings|money} ...,what are {network engineers|position} {making|...,Has {Nicole|name} been working here for {4 yea...,What {percent|function} of employees were {hir...,i want the employees that have been {hired |em...,I want to know if {Sam Athwal|name} {works for...,{Jenna Dietrich|name} is the {supervisor|manag...
7,Is {Mohammed Latif|name} a citizen of the us?,{cumulative|function} {count|function} of empl...,Which employees have been {terminated|employme...,"Does {54,000|sys_number} exceed what {jessica|...",what are {women|sex} {making|money} on {averag...,all the {earnings|money} of {female|sex} in th...,What was the date when {ivan rogers|name} was ...,What {percentage|function} of employees have b...,Give me the employees that have a {join date|e...,who is {Thelma Petrowsky|name}'s {managing|man...,Which are all of the employees who are having ...
8,Where does {ivan rogers|name} live?,{average|function} {age of|age} the employees ...,Who {has worked here|employment_action} based ...,{ivan|name} {Salary|money} {Yearly|time_recur},"of all the {sales manager|position}s, what is ...",who {makes|money} the {minimum|extreme} {incom...,When {Sophia Theamstern|name} was {hired|emplo...,can you please tell me what {fraction|function...,Fetch me a list of workers that have their {bi...,does {Dianna Blount|name} {report|manager} to ...,Tell me which employees have {Patrick Moran|na...
9,I want {sarah warfield|name}'s state,What's the {summed|function} {num of|function}...,give me a list of employees that are based in ...,What does {Rose Ivey|name} get for {income|mon...,get me the {average|function} amount that the ...,Get me the {lowest|extreme} {six|sys_number} {...,Fetch me {Francesco Barone|name}'s {Bday|dob},What {pct|function} of our staff have a {bday|...,get me {senior database admins|position} {born...,get me a list of all of the employees who {sup...,Who are those employees that right now have {C...
10,how did {dawn|name} hear about our corporation,What is the {total|function} {number of|functi...,Which employees have been with the company lon...,"According to the {payroll|money}, how much doe...",{number of|function} people {earning|money} {f...,which employees are {making|money} {less than|...,{Leigh Smith|name} {date of birth|dob},What {percentage|function} of employees were {...,I want all of the employees in the {sales depa...,list employees who {leads|manager} {Adrienne H...,Gimmmie a list of employees that are currently...
11,Does {Michael|name} {still work at|employment_...,{average|function} {age of|age} workers who ar...,employees are {under|comparator} {45|sys_numbe...,What is {Brooke oliver|name}'s {each year|time...,give me the {typical|function} take home {sala...,{non-citizen|citizendesc} {paycheck|money}s,{Ashley Rose|name} {birthday|dob},I want the {total|function} {number of|functio...,Can you tell me whether there are any {June|sy...,{amy dunn|name} {manager|manager} name,i want the name of everyone that is {working f...


In [17]:
#label_chk(intent_txt, ent_dict)

In [18]:
#get_labels("What is the {manager|manager} name of {Julia|name}?")

<h4>Generate Gazetteers</h4>

In [17]:
gen_gazetteers(ent_dict)

<h4>Generate Mapping.json</h4>

In [18]:
gen_map_json_files(ent_dict)

<h4>Data Augmentation</h4>

In [21]:
def entity_swap(sentence, positions, uniq):
    chars = list(sentence)
    for i in reversed(range(len(positions))):
        pos = positions[i]
        ent = ''.join(chars[pos[0]:(pos[1] + 1)])
        excluded = [] #['age']
        #print("ENTITY FOUND")
        #print(ent)
        kv = get_kv([ent])
        #print(kv)       
        for i in range(len(kv[0])):
            if kv[1][i] in uniq and kv[1][i] not in excluded:
                new_ent = "{" + random.choice(uniq[kv[1][i]]) + "|" + kv[1][i] + '}'
                chars[pos[0]:(pos[1]+1)] = list(new_ent)
                #print(new_ent)
    #print("FINAL")
    return "".join(chars)

In [22]:
# Remove Labelling
def entity_label_remove(sentence, positions, uniq):
    str_list = sentence.replace("{", "").replace("}", "").split()
    for idx, elem in enumerate(str_list):
        if "|" in elem: str_list[idx] = elem.split('|')[0]
    return " ".join(str_list)

In [None]:
#entity_swap("What is the {manager|manager} name of {Julia|name}?", [[12, 28], [38, 49]], uniq)
" ".join("i like to jump".split())

'i like to jump'

In [202]:
# Create Variation in name
# param name (str) - in form: "Mia Brown"
# return name (str) - varied name
def name_rotate(name):
    split = name.split()
    choice = random.choice([0,1,2,3])
    if choice == 0: return split[0].lower()
    elif choice == 1: return split[0] + ' ' + split[1]
    elif choice == 2: return split[0].lower() + ' ' + split[1].lower()
    else: return split[0]  

In [55]:
name_rotate('Kunal Sharma')

'kunal sharma'

In [119]:
sentences = []
for name in uniq['name']:
    for i in ['position', 'job', 'role', 'job title', 'occupation']:
        sentences.append("What is {" + name_rotate(name) + "|name}'s {" + i + "|position}?")
        sentences.append("What's {" + name_rotate(name) + "|name}s {" + i + "|position}")
        sentences.append("whats {" + name_rotate(name) + "|name} {" + i + "|position}?")
        sentences.append("{" + name_rotate(name) + "|name}'s {" + i + "|position}")
        sentences.append("{" + name_rotate(name) + "|name} {" + i + "|position}")
        sentences.append("{" + i + "|position} of {" + name_rotate(name) + "|name}")
    for i in ['race']:
        sentences.append("What is {" + name_rotate(name) + "|name}'s {" + i + "|racedesc}?")
        sentences.append("what's {" + name_rotate(name) + "|name}'s {" + i + "|racedesc}")
        sentences.append("whats {" + name_rotate(name) + "|name} {" + i + "|racedesc}?")
        sentences.append("{" + name_rotate(name) + "|name}'s {" + i + "|racedesc}")
        sentences.append("{" + i + "|racedesc} of {" + name_rotate(name) + "|name}")
    for i in ['department', 'dept']:
        sentences.append("what is {" + name_rotate(name) + "|name}'s {" + i + "|department}?")
        sentences.append("what's {" + name_rotate(name) + "|name}s {" + i + "|department}")
        sentences.append("whats {" + name_rotate(name) + "|name}'s {" + i + "|department}?")
        sentences.append("{" + name_rotate(name) + "|name}'s {" + i + "|department}")
        sentences.append("{" + name_rotate(name) + "|name} {" + i + "|department}")
        sentences.append("{" + i + "|department} of {" + name_rotate(name) + "|name}")
    for i in ['sex', 'gender']:
        sentences.append("{"+ name_rotate(name) +"|name}" + " {" + i +"|sex}")
        sentences.append("what's {"+ name_rotate(name) +"|name}'s'" + " {" + i +"|sex}")
        sentences.append("what {" + i + "|sex} is " + "{"+ name_rotate(name) +"|name}?")
        sentences.append("{" + i + "|sex} of " + "{"+ name_rotate(name) +"|name}")
        sentences.append("what's the {" + i + "|sex} of " + "{"+ name_rotate(name) +"|name}")
        sentences.append("{" + i + "|sex} that " + "{"+ name_rotate(name) +"|name} is")
    # sex, gender
    sentences.append("{"+ name_rotate(name) +"|name}"+" {state|state}")
    sentences.append("what {state|state} is "+"{"+ name_rotate(name) +"|name} from?")
    sentences.append("{state|state} of "+"{"+ name_rotate(name) +"|name}")
    sentences.append("{"+ name_rotate(name) +"|name} "+"{state|state}")
    sentences.append("{"+ name_rotate(name) +"|name}'s"+" {state|state}")
    sentences.append("{state|state} that "+"{"+ name_rotate(name) +"|name} is in")
    # employment status
    sentences.append("{"+ name_rotate(name) +"|name}"+" {employment status|employment_status}")
    sentences.append("what is the {employment status|employment_status} of "+"{"+ name_rotate(name) +"|name}")
    sentences.append("{employment status|employment_status} of "+"{"+ name_rotate(name) +"|name}")
    sentences.append("{"+ name_rotate(name) +"|name} " + "{employment status|employment_status}")
    sentences.append("{"+ name_rotate(name) +"|name}'s" + " {status of employment|employment_status}")
    sentences.append("{status of employment|employment_status} that " + "{"+ name_rotate(name) +"|name} is in")

In [90]:
# employment status

In [85]:
sentences = []
for name in uniq['name']:
    sentences.append("{"+ name_rotate(name) +"|name}" + " {state|state}")
    sentences.append("what {state|state} is " + "{"+ name_rotate(name) +"|name} from?")
    sentences.append("{state|state} of " + "{"+ name_rotate(name) +"|name}")
    sentences.append("{state|state} that " + "{"+ name_rotate(name) +"|name} is in")

In [137]:
sentences = []
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("whats the {count of|function} "+emp+ " that are {"+i+"|citizendesc}?")
        sentences.append("what's the {number of|function} "+emp+ " which are {"+i+"|citizendesc}")
        sentences.append("{total|function} {count of|function} {"+i+"|citizendesc}")
        sentences.append("{"+i+"|citizendesc} {total|function}")
        sentences.append("{how many|function} {"+i+"|citizendesc}")
        sentences.append("whats the {number of|function} {"+i+"|citizendesc}")
        sentences.append("{"+i+"|citizendesc} " + "{total|function} {count of|function}")
        sentences.append("{number of|function} {"+i+"|citizendesc} at this org")
        sentences.append("{number of|function} {"+i+"|citizendesc} in the company")
        sentences.append("{how many|function} {"+i+"|citizendesc} are working at this company")

In [178]:
sentences = []
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("what is the {typical|function} {salary|money} of {"+i+"|citizendesc} "+ emp)
        sentences.append("what is the {average|function} {pay|money} for {"+i+"|citizendesc} "+ emp)
        sentences.append("whats the {average|function} {pay|money} for {"+i+"|citizendesc} "+ emp)
        sentences.append("{average|function} {hourly|time_recur} {earnings|money} for {"+i+"|citizendesc}")
        sentences.append("how much do {"+i+"|citizendesc} {earn|money} on {average|function} {hourly|time_recur}")
        sentences.append("what do {"+i+"|citizendesc} {make|money} on {average|function} {yearly|time_recur}")
        sentences.append("{"+i+"|citizendesc}" + " {earnings|money} {total|function}")
        sentences.append("{"+i+"|citizendesc}" + " {pay|money} {sum|function}")
        
        

In [182]:
sentences = []
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("which {"+i+"|citizendesc} " + emp + " {earns|money} the {most|extreme}")
        sentences.append("{"+i+"|citizendesc} " + emp + " who {makes|money} the {least|extreme}")
        sentences.append("get me the {"+i+"|citizendesc} {earning|money} the {max|extreme}")
        sentences.append("find me the {"+i+"|citizendesc} " + emp + " {making|money} the {minimum|extreme}")
        sentences.append("{earnings|money} of {"+i+"|citizendesc} " + emp)
        sentences.append("{salaries|money} of {"+i+"|citizendesc}")
        sentences.append("{paychecks|money} {"+i+"|citizendesc}")
        sentences.append("{"+i+"|citizendesc} {salaries|money}")
        sentences.append("{"+i+"|citizendesc} " + emp + " {salaries|money}")
        

In [191]:
sentences = []
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("{count|function} of {"+i+"|citizendesc} " + emp + " {born|dob} {before|date_compare} {1994|sys_time}")
        sentences.append("{sum|function} of {"+i+"|citizendesc} " + emp + " {hired|employment_action} {after|date_compare} {2005|sys_time}")
        sentences.append("{count of|function} {"+i+"|citizendesc} {let go|employment_action} {prior to|date_compare} {2010|sys_time}")
        sentences.append("{"+i+"|citizendesc} " + emp + " with {birthday|dob} {before|date_compare} {1996|sys_time} {count|function}" )
        sentences.append("{"+i+"|citizendesc} " + emp + " that {joined|employment_action} {after|date_compare} {2013|sys_time} {how many|function}")
        sentences.append("{"+i+"|citizendesc} " + emp + " was {fired|employment_action} {prior to|date_compare} {2009|sys_time} {count|function}")

In [198]:
sentences = []
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("get me a list of {"+i+"|citizendesc} " + emp + "that have their {birthdays|dob} in the {1970's|time_interval}")
        sentences.append("which {"+i+"|citizendesc} " + emp + " have a {birthday|dob} in {1960|time_interval}")
        sentences.append("Find me the names of {"+i+"|citizendesc}" + " {hired|employment_action} in {2013|sys_time}")
        sentences.append("which {"+i+"|citizendesc} "+ emp +" {joined|employment_action} in {2012|sys_time}")
        sentences.append("{"+i+"|citizendesc}"+" employees were {fired|employment_action} in the {last|date_compare} {five years|sys_duration}?")

In [200]:
uniq['name']

['William LaRotonda',
 'Tyrone Steans',
 'Estelle Howard',
 'Leigh Smith',
 'Brandon LeBlanc',
 'Sean Quinn',
 'Bonalyn Boutwell',
 'Amy Foster-Baker',
 'Janet King',
 'Jennifer Zamora',
 'Renee Becker',
 'Taisha Goble',
 'Daniff Hernandez',
 'Jayne Horton',
 'Noelle Johnson',
 'Thomas Murray',
 'Randall Pearson',
 'Thelma Petrowsky',
 'Lori Roby',
 'Jason Salter',
 'Kramer Simard',
 'Simon Roup',
 'Ricardo Ruiz',
 'Peter Monroe',
 'Eric Dougall',
 'Rick Clayton',
 'Lisa Galia',
 'Leonara Lindsay',
 'Alejandro Bacong',
 'Anthony Cisco',
 'Linda Dolan',
 'Maria Gonzalez',
 'Carlos Merlos',
 'Tanya Morway',
 'Anita Shepard',
 'Neville Tredinnick',
 'Jumil Turpin',
 'Karthikeyan Ait Sidi',
 'Claudia Carr',
 'Donald Favis',
 'Bianca Roehrich',
 'Ann Daniele',
 'Jyoti Lajiri',
 'Jeremiah Semizoglou',
 'Joe South',
 'Sarah Warfield',
 'Elisa Bramante',
 'Michael Albert',
 'Charles Bozzi',
 'Webster Butler',
 'Elijiah Gray',
 'Jonathan Hogland',
 'Walter Immediato',
 'Ketsia Liebig',
 'Branno

In [None]:
for emp in ['people', 'workers', 'employees']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("get me a list of {"+i+"|citizendesc} " + emp + "that have their {birthdays|dob} in the {1970's|time_interval}")
        sentences.append("which {"+i+"|citizendesc} " + emp + " have a {birthday|dob} in {1960|time_interval}")
        sentences.append("Find me the names of {"+i+"|citizendesc}" + " {hired|employment_action} in {2013|sys_time}")
        sentences.append("which {"+i+"|citizendesc} "+ emp +" {joined|employment_action} in {2012|sys_time}")
        sentences.append("{"+i+"|citizendesc}"+" employees were {fired|employment_action} in the {last|date_compare} {five years|sys_duration}?")

In [203]:
sentences = []
for name in uniq['name']:
    for i in ['non citizen', 'not a citizen', 'non us citizen', 'citizen', 'us citizen', 'not citizens', 'from abroad',
                 'immigrant', 'immigrants', 'eligble non citizen', 'non-citizen', 'non-citizens', 'eligible non-citizens']:
        sentences.append("is " + name_rotate(name) + " a " + "{"+i+"|citizendesc}?")
        sentences.append(name_rotate(name) + " {"+i+"|citizendesc}?")
        sentences.append("{"+i+"|citizendesc} " + name_rotate(name) + "?")

In [204]:
print(len(sentences))

9555


In [165]:
k = ['']

In [158]:
'': print("op")

SyntaxError: illegal target for annotation (<ipython-input-158-81e9b6e59ee3>, line 1)

In [166]:
try: k[0]
except: print("p")

In [196]:
for i in sentences: print(i)

get me a list of {non citizen|citizendesc} peoplethat have their {birthdays|dob} in the {1970's|time_interval}
which {non citizen|citizendesc} people have a {birthday|dob} in {1960|time_interval}
Find me the names of {non citizen|citizendesc} {hired|employment_action} in {2013|sys_time}
which {non citizen|citizendesc}people {joined|employment_action} in {2012|sys_time}
{non citizen|citizendesc} employees were {fired|employment_action} in the {last|date_compare} {five years|sys_duration}?
get me a list of {not a citizen|citizendesc} peoplethat have their {birthdays|dob} in the {1970's|time_interval}
which {not a citizen|citizendesc} people have a {birthday|dob} in {1960|time_interval}
Find me the names of {not a citizen|citizendesc} {hired|employment_action} in {2013|sys_time}
which {not a citizen|citizendesc}people {joined|employment_action} in {2012|sys_time}
{not a citizen|citizendesc} employees were {fired|employment_action} in the {last|date_compare} {five years|sys_duration}?
get 

In [205]:
for i in random.sample(sentences, 70): print(i)

{us citizen|citizendesc} Carol?
hamish {non-citizens|citizendesc}?
{from abroad|citizendesc} jeanette?
{not citizens|citizendesc} maliki?
{immigrants|citizendesc} Miguel Estremera?
is dawn a {eligble non citizen|citizendesc}?
bartholemew {us citizen|citizendesc}?
is sarah warfield a {not citizens|citizendesc}?
Donna Brill {non us citizen|citizendesc}?
is shakira perry a {eligible non-citizens|citizendesc}?
robyn manchester {eligble non citizen|citizendesc}?
Colby {us citizen|citizendesc}?
is Caitrin a {immigrants|citizendesc}?
is maliki moumanil a {from abroad|citizendesc}?
is Jeremiah a {immigrants|citizendesc}?
quinn {eligible non-citizens|citizendesc}?
webster butler {us citizen|citizendesc}?
{eligible non-citizens|citizendesc} Robyn?
is Barry a {non citizen|citizendesc}?
{not citizens|citizendesc} Raul?
Maliki Moumanil {us citizen|citizendesc}?
lenora tejeda {not a citizen|citizendesc}?
is mohammed a {non citizen|citizendesc}?
is simon a {non citizen|citizendesc}?
{non citizen|citi

In [None]:
"What is {mia|name}'s {job title|position}"

In [None]:
intent_txt

Unnamed: 0,get_info,get_aggregate,get_employees,get_salary,get_salary_aggregate,get_salary_employees,get_date,get_date_range_aggregate,get date_range_employees,get_hierarchy
2,What is {nan|name}'s race?,What is the {total|function} {number of|functi...,Give me employees who are {single|maritaldesc},What is {Mia|name}'s {pay|money}?,What is the {median|function} {pay|money} of {...,Which employee(s) have {lowest|extreme} {incom...,What is the {date of hiring|employment_action}...,What {percentage|function} of employees were {...,Give me a list of people {hired|employment_act...,Who is {Mia|name}'s {manager|manager}?
3,Is {Michael|name} {married|maritaldesc}?,What {percent|function} of employees {exceeded...,All employees from {MA|state},Tell me who earned the {least|extreme} that wa...,what {percent|function} of employees {make|mon...,who is the {highest|extreme} {earning|money} {...,When did {Amy|name} {join|employment_action} t...,What {percent|function} of employees were {hir...,Tell me about employees who {started|employmen...,Which employees have {Julia|name} as their {ma...
4,What is {Nan|name}'s official position?,What is the {percentage|function} of new grads...,Which employees have been recently {terminated...,What is the {pay rate|money} of {Julia|name}?,What is the {average|function} {pay rate|money}?,For employees {hired|employment_action} {betwe...,How long has {Ivan|name} been with the company?,What {percentage|function} of employees were {...,Which employees were not yet {born|dob} when {...,What is the name of {Julia|name}'s {manager|ma...
5,did {Nan|name} hear about us through {Glassdoo...,What is the {average|function} {age|age}?,Which employees are not {US citizens|citizende...,What is {Nan|name}'s {pay rate|money}?,What is the {average|function} {pay|money} of ...,what are the {salaries|money} for employees th...,When was {Nan|name} {fired|employment_action}?,What {percent|function} of all our employees w...,list the employees who {joined|employment_acti...,Who is {Michael|name}'s {manager|manager}?
6,give me {Nan|name}'s race please,What {percent|function} of employees are manag...,Which employees were {let go|employment_action...,How much does {Michael|name} {make|money}?,Calculate the {average|function} {pay rate|mon...,what are our {top|extreme} {earners|money} {ma...,How long was {Mia|name} working for?,What {percentage|function} of employees were {...,Which employess were {hired|employment_action}...,Who is {John Reeder|name}'s {manager|manager}?
7,What is {Mia|name}'s employment status?,What's the {average|function} {age of|age} emp...,{managers|position},Is {Mia|name} being {paid|money} {$40k|sys_amo...,{average|function} {pay rate|money} for {women...,{below|comparator} {average|function} {earning...,When was {Michael|name}'s {date of hire|employ...,{How many|function} employees were {hired|empl...,Which employees were {hired|employment_action}...,Who is {Mia Brown|name}'s {manager|manager}?
8,Is {Ivan|name} from out of state?,What {percentage|function} of the employees ar...,Which employees have been {terminated|employme...,Does {Mia|name} get {$|money}{70k|sys_number} ...,What's the {average|function} {pay rate|money}?,Tell me who all are {making|money} {more than|...,What is {Nan|name}'s {date of birth|dob}?,{How many|function} people were based out of {...,Who worked for Cisco for {less than|comparator...,Who is the {manager|manager} for {Bob|name}
9,Does {Michael|name} {still work at|employment_...,What {percentage|function} of employees are {e...,Which employees have been with the company lon...,When we let {Nan|name} go {fired|employment_ac...,What is the {average|function} {pay rate|money...,Which {software engineers|position} are {paid|...,What year was {Mia|name} {hired|employment_act...,What {percentage|function} of employees have b...,Which employees have been {hired|employment_ac...,What is {Mia|name}'s {manager|manager}'s name?
10,What is {Mia|name}'s {performance score|perfor...,What {percentage|function} of employees are {f...,Which employees have gotten only {positive fee...,{pay rate|money} of {Mia|name},{How many|function} employees are paid {above|...,"Of all the {Production Managers|position}, whi...",When did we {fire|employment_action} {Jeff|name}?,{How many|function} employees were {born|dob} ...,Which employee was {hired|employment_action} w...,Who {reports|manager} into {Nan Singh|name}
11,What position is {Julia|name} in?,{Percentage|function} of Employees in departme...,Which employees have a {spouse|maritaldesc}?,how much {money|money} does {Mia|name} make?,what {number of|function} people {earn|money} ...,above {average|function} {earning|money} emplo...,How long has {Mia|name} worked here?,{How many|function} people have {worked here|e...,Which employees have {been with us|employment_...,can i have the names of employees who report t...


In [None]:
def data_augment(df, uniq):
    df = intent_txt
    for col in df:
        augment = []
        print(col.upper() + "=============================================================")
        idx = 2
        nan = is_nan(df[col][idx])
        l_dict = {}
        while(idx < len(df)):
                if(is_nan(df[col][idx])): break
                #print(idx)
                sentence = df[col][idx]
                labels, pos = get_labels(sentence)
                for i in range(3): augment.append(entity_swap(sentence, pos, uniq))
                idx += 1
        # Create files
        #augment = set(augment)
        #if((250 - idx) < len(augment)): augment = random.sample(augment, 250 - idx)
        print("Augmented Lines Generated: " + str(len(augment)))
        with open('data_augment/' + col + ".txt", 'w+') as filehandle:  
            filehandle.writelines("%s\n" % line for line in augment)

#data_augment(intent_txt, uniq)

In [None]:
from mindmeld import configure_logs; configure_logs()
from mindmeld.components.nlp import NaturalLanguageProcessor
nlp = NaturalLanguageProcessor(app_path='./hr_assistant')
nlp.build()

  from numpy.core.umath_tests import inner1d


Fitting domain classifier
Loading raw queries from file ./hr_assistant/domains/date/get_date/train.txt
Loading raw queries from file ./hr_assistant/domains/date/get_date_range_aggregate/train.txt
Loading raw queries from file ./hr_assistant/domains/date/get_date_range_employees/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_aggregate/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_employees/train.txt
Loading raw queries from file ./hr_assistant/domains/general/get_info/train.txt
Loading raw queries from file ./hr_assistant/domains/hierarchy/get_hierarchy/train.txt
Loading raw queries from file ./hr_assistant/domains/salary/get_salary/train.txt
Loading raw queries from file ./hr_assistant/domains/salary/get_salary_aggregate/train.txt
Loading raw queries from file ./hr_assistant/domains/salary/get_salary_employees/train.txt
Loading raw queries from file ./hr_assistant/domains/unsupported/unsupported/train.txt
Loading queries from fil

100%|██████████| 1/1 [00:00<00:00, 53.69it/s]

Loaded 1 document
Fitting role classifier: domain='date', intent='get_date', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 907.41it/s]

Loaded 245 documents
Fitting role classifier: domain='date', intent='get_date', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 54.44it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date', entity_type='sys_duration'
No role model configuration set. Using default.





Fitting entity recognizer: domain='date', intent='get_date_range_employees'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 92.88%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_res

100%|██████████| 28/28 [00:00<00:00, 470.98it/s]

Loaded 28 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 50.45it/s]

Loaded 1 document
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 165.67it/s]

Loaded 6 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 78.63it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 104.64it/s]

Loaded 3 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_interval'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 49.04it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 129.05it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 290.72it/s]

Loaded 24 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 50.91it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'



100%|██████████| 5/5 [00:00<00:00, 52.93it/s]

Loaded 5 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 27.04it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='time_interval'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_interval'
Elasticsearch index 'synonym_time_interval' for application 'hr_assistant' already exists!
Loading index 'synonym_time_interval'



100%|██████████| 10/10 [00:00<00:00, 255.23it/s]

Loaded 10 documents
Fitting role classifier: domain='date', intent='get_date_range_employees', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 112.24it/s]

Loaded 6 documents
Fitting entity recognizer: domain='date', intent='get_date_range_aggregate'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 91.42%, params: {'C': 1000000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_stat

100%|██████████| 28/28 [00:00<00:00, 602.06it/s]

Loaded 28 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 48.63it/s]

Loaded 1 document
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 73.09it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 58.29it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='comparator'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'


100%|██████████| 4/4 [00:00<00:00, 189.46it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='function'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 122.67it/s]

Loaded 4 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 63.41it/s]

Loaded 2 documents
Fitting role classifier: domain='date', intent='get_date_range_aggregate', entity_type='time_interval'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_interval'
Elasticsearch index 'synonym_time_interval' for application 'hr_assistant' already exists!
Loading index 'synonym_time_interval'



100%|██████████| 10/10 [00:00<00:00, 202.78it/s]

Loaded 10 documents





Fitting intent classifier: domain='general'
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 98.71%, params: {'C': 1, 'class_weight': {0: 1.105, 1: 0.906276150627615, 2: 1.0173267326732673}, 'fit_intercept': True}
Fitting entity recognizer: domain='general', intent='get_info'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 95.16%, params: {'C': 100, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution

100%|██████████| 17/17 [00:00<00:00, 461.56it/s]

Loaded 17 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 719.07it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'
Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'



100%|██████████| 39/39 [00:00<00:00, 698.91it/s]

Loaded 39 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 489.89it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'





Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'


100%|██████████| 245/245 [00:00<00:00, 1029.06it/s]

Loaded 245 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 79.87it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 158.04it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 51.76it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='performance_score'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'



100%|██████████| 7/7 [00:00<00:00, 144.30it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'





Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 72.09it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 432.15it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='maritaldesc'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'


100%|██████████| 5/5 [00:00<00:00, 98.45it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'





Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'


100%|██████████| 5/5 [00:00<00:00, 122.37it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_info', entity_type='department'





No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'


100%|██████████| 6/6 [00:00<00:00, 150.20it/s]

Loaded 6 documents
Fitting entity recognizer: domain='general', intent='get_aggregate'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.77%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolut

100%|██████████| 2/2 [00:00<00:00, 76.83it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='performance_score'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'



100%|██████████| 7/7 [00:00<00:00, 172.10it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 639.13it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 67.51it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 82.41it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'





Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 80.07it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'



100%|██████████| 4/4 [00:00<00:00, 96.94it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'



100%|██████████| 20/20 [00:00<00:00, 430.82it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 657.09it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 149.38it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 117.60it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'



100%|██████████| 5/5 [00:00<00:00, 141.18it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'
Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'



100%|██████████| 39/39 [00:00<00:00, 774.09it/s]

Loaded 39 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='reason_for_termination'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_reason_for_termination'
Elasticsearch index 'synonym_reason_for_termination' for application 'hr_assistant' already exists!
Loading index 'synonym_reason_for_termination'



100%|██████████| 17/17 [00:00<00:00, 392.75it/s]

Loaded 17 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'





Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'


100%|██████████| 1/1 [00:00<00:00, 57.79it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 961.52it/s]

Loaded 245 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 48.06it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'





Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'


100%|██████████| 5/5 [00:00<00:00, 137.76it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_aggregate', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 132.98it/s]

Loaded 6 documents
Fitting entity recognizer: domain='general', intent='get_employees'





No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.27%, params: {'C': 100, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model c

100%|██████████| 2/2 [00:00<00:00, 71.19it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='performance_score'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_performance_score'
Elasticsearch index 'synonym_performance_score' for application 'hr_assistant' already exists!
Loading index 'synonym_performance_score'



100%|██████████| 7/7 [00:00<00:00, 182.72it/s]

Loaded 7 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 505.28it/s]

Loaded 24 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 67.41it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_employees', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 150.42it/s]

Loaded 3 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='race'
No role model configuration set. Using default.
Entity data file not found at './hr_assistant/entities/race/gazetteer.txt'. Proceeding with empty entity data.
Entity mapping file not found at './hr_assistant/entities/race/mapping.json'. Proceeding with empty entity data.
Entity map file not found at ./hr_assistant/entities/race/mapping.json
Importing synonym data to synonym index 'synonym_race'
Elasticsearch index 'synonym_race' for application 'hr_assistant' already exists!
Loading index 'synonym_race'



0it [00:00, ?it/s]

Loaded 0 documents





Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 80.72it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='function'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 115.78it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 67.50it/s]

Loaded 2 documents





Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_ordinal'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='employee_source'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employee_source'
Elasticsearch index 'synonym_employee_source' for application 'hr_assistant' already exists!
Loading index 'synonym_employee_source'


100%|██████████| 20/20 [00:00<00:00, 435.60it/s]

Loaded 20 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='state'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'


100%|██████████| 28/28 [00:00<00:00, 575.92it/s]

Loaded 28 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 154.82it/s]

Loaded 6 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='comparator'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'


100%|██████████| 4/4 [00:00<00:00, 134.70it/s]

Loaded 4 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='maritaldesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_maritaldesc'
Elasticsearch index 'synonym_maritaldesc' for application 'hr_assistant' already exists!
Loading index 'synonym_maritaldesc'



100%|██████████| 5/5 [00:00<00:00, 165.62it/s]

Loaded 5 documents





Fitting role classifier: domain='general', intent='get_employees', entity_type='age'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_age'
Elasticsearch index 'synonym_age' for application 'hr_assistant' already exists!
Loading index 'synonym_age'


100%|██████████| 39/39 [00:00<00:00, 813.46it/s]

Loaded 39 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='reason_for_termination'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_reason_for_termination'
Elasticsearch index 'synonym_reason_for_termination' for application 'hr_assistant' already exists!
Loading index 'synonym_reason_for_termination'



100%|██████████| 17/17 [00:00<00:00, 433.42it/s]

Loaded 17 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 50.67it/s]

Loaded 1 document
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='general', intent='get_employees', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 99.07it/s]

Loaded 2 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='employment_status'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'


100%|██████████| 5/5 [00:00<00:00, 128.61it/s]

Loaded 5 documents
Fitting role classifier: domain='general', intent='get_employees', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 148.33it/s]

Loaded 6 documents
Fitting intent classifier: domain='salary'





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 96.25%, params: {'C': 10, 'class_weight': {0: 0.9269058295964125, 1: 1.0489655172413792, 2: 1.0666666666666667}, 'fit_intercept': True}
Fitting entity recognizer: domain='salary', intent='get_salary_employees'
No entity model configuration set. Using default.
Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.92%, params: {'C': 10000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configura

100%|██████████| 20/20 [00:00<00:00, 489.96it/s]

Loaded 20 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 1110.38it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 121.72it/s]

Loaded 3 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 77.15it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='racedesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_racedesc'
Elasticsearch index 'synonym_racedesc' for application 'hr_assistant' already exists!
Loading index 'synonym_racedesc'



100%|██████████| 6/6 [00:00<00:00, 208.26it/s]

Loaded 6 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='employment_status'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_status'
Elasticsearch index 'synonym_employment_status' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_status'



100%|██████████| 5/5 [00:00<00:00, 143.43it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_amount-of-money'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sex'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'


100%|██████████| 2/2 [00:00<00:00, 49.11it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 119.10it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 131.63it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!
Loading index 'synonym_position'



100%|██████████| 24/24 [00:00<00:00, 406.81it/s]

Loaded 24 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='extreme'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 68.68it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='function'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'


100%|██████████| 4/4 [00:00<00:00, 86.11it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='date_compare'
No role model configuration set. Using default.





Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'


100%|██████████| 2/2 [00:00<00:00, 63.93it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'



100%|██████████| 1/1 [00:00<00:00, 50.22it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='money'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_money'
Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'



100%|██████████| 1/1 [00:00<00:00, 65.47it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_employees', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 126.75it/s]

Loaded 6 documents
Fitting entity recognizer: domain='salary', intent='get_salary'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 98.30%, params: {'C': 10000, 'penalty': 'l1'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already 

100%|██████████| 245/245 [00:00<00:00, 1081.43it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 50.60it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 118.95it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_amount-of-money'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 102.53it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='sys_duration'





No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'


100%|██████████| 2/2 [00:00<00:00, 100.17it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary', entity_type='money'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_money'
Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'



100%|██████████| 1/1 [00:00<00:00, 57.38it/s]

Loaded 1 document
Fitting entity recognizer: domain='salary', intent='get_salary_aggregate'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 93.22%, params: {'C': 10000, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resol

100%|██████████| 20/20 [00:00<00:00, 379.57it/s]

Loaded 20 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='state'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_state'
Elasticsearch index 'synonym_state' for application 'hr_assistant' already exists!
Loading index 'synonym_state'



100%|██████████| 28/28 [00:00<00:00, 762.23it/s]

Loaded 28 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 998.33it/s]

Loaded 245 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='dob'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_dob'
Elasticsearch index 'synonym_dob' for application 'hr_assistant' already exists!
Loading index 'synonym_dob'



100%|██████████| 1/1 [00:00<00:00, 36.22it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_time'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_number'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='citizendesc'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_citizendesc'
Elasticsearch index 'synonym_citizendesc' for application 'hr_assistant' already exists!
Loading index 'synonym_citizendesc'



100%|██████████| 3/3 [00:00<00:00, 88.85it/s]

Loaded 3 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='employment_action'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_employment_action'
Elasticsearch index 'synonym_employment_action' for application 'hr_assistant' already exists!
Loading index 'synonym_employment_action'



100%|██████████| 2/2 [00:00<00:00, 63.77it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='time_recur'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_time_recur'
Elasticsearch index 'synonym_time_recur' for application 'hr_assistant' already exists!
Loading index 'synonym_time_recur'



100%|██████████| 5/5 [00:00<00:00, 104.21it/s]

Loaded 5 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sex'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_sex'
Elasticsearch index 'synonym_sex' for application 'hr_assistant' already exists!
Loading index 'synonym_sex'



100%|██████████| 2/2 [00:00<00:00, 65.05it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 119.76it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_duration'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='sys_amount-of-money'
No role model configuration set. Using default.
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='position'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_position'
Elasticsearch index 'synonym_position' for application 'hr_assistant' already exists!





Loading index 'synonym_position'


100%|██████████| 24/24 [00:00<00:00, 541.93it/s]

Loaded 24 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='extreme'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_extreme'
Elasticsearch index 'synonym_extreme' for application 'hr_assistant' already exists!
Loading index 'synonym_extreme'



100%|██████████| 2/2 [00:00<00:00, 82.88it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='function'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_function'
Elasticsearch index 'synonym_function' for application 'hr_assistant' already exists!
Loading index 'synonym_function'



100%|██████████| 4/4 [00:00<00:00, 101.29it/s]

Loaded 4 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='date_compare'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_date_compare'
Elasticsearch index 'synonym_date_compare' for application 'hr_assistant' already exists!
Loading index 'synonym_date_compare'



100%|██████████| 2/2 [00:00<00:00, 59.30it/s]

Loaded 2 documents
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'





Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'


100%|██████████| 1/1 [00:00<00:00, 43.94it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='money'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_money'





Elasticsearch index 'synonym_money' for application 'hr_assistant' already exists!
Loading index 'synonym_money'


100%|██████████| 1/1 [00:00<00:00, 60.31it/s]

Loaded 1 document
Fitting role classifier: domain='salary', intent='get_salary_aggregate', entity_type='department'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_department'
Elasticsearch index 'synonym_department' for application 'hr_assistant' already exists!
Loading index 'synonym_department'



100%|██████████| 6/6 [00:00<00:00, 141.48it/s]

Loaded 6 documents
Fitting entity recognizer: domain='hierarchy', intent='get_hierarchy'
No entity model configuration set. Using default.





Selecting hyperparameters using k-fold cross-validation with 5 splits
Best accuracy: 96.67%, params: {'C': 1, 'penalty': 'l2'}
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
No entity_resolution model configuration set. Using default.
Fitting role classifier: domain='hierarchy', intent='get_hierarchy', entity_type='manager'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_manager'
Elasticsearch index 'synonym_manager' for application 'hr_assistant' already exists!
Loading index 'synonym_manager'


100%|██████████| 1/1 [00:00<00:00, 53.76it/s]

Loaded 1 document
Fitting role classifier: domain='hierarchy', intent='get_hierarchy', entity_type='name'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_name'
Elasticsearch index 'synonym_name' for application 'hr_assistant' already exists!
Loading index 'synonym_name'



100%|██████████| 245/245 [00:00<00:00, 1177.56it/s]

Loaded 245 documents
Fitting role classifier: domain='hierarchy', intent='get_hierarchy', entity_type='comparator'
No role model configuration set. Using default.
Importing synonym data to synonym index 'synonym_comparator'
Elasticsearch index 'synonym_comparator' for application 'hr_assistant' already exists!
Loading index 'synonym_comparator'



100%|██████████| 4/4 [00:00<00:00, 163.34it/s]

Loaded 4 documents
Fitting entity recognizer: domain='unsupported', intent='unsupported'
No entity model configuration set. Using default.
There are no labels in this label set, so we don't fit the model.





In [None]:
from mindmeld.components.dialogue import Conversation
conv = Conversation(nlp=nlp, app_path='../hr_assistant')

The application package hr_assistant is already imported.




In [None]:
df = intent_txt
df.columns

Index(['get_info', 'get_aggregate', 'get_employees', 'get_salary',
       'get_salary_aggregate', 'get_salary_employees', 'get_date',
       'get_date_range_aggregate', 'get date_range_employees',
       'get_hierarchy'],
      dtype='object')

In [None]:
orig_conv = Conversation(nlp=nlp, app_path='../hr_assistant')

issues = []
#for col in df:
col = 'get_hierarchy'
#if col not in ['get_info', 'get_aggregate', 'get_employees']:
print(col.upper() + "=============================================================")
idx = 2
nan = is_nan(df[col][idx])
while(idx < len(df)):
        if(is_nan(df[col][idx])): break
        #print(idx)
        sentence = df[col][idx]
        labels, pos = get_labels(sentence)
        q = entity_label_remove(sentence, pos, uniq)
        conv = orig_conv
        try: conv.say(q)
        except: 
            print(q)
            issues.append(q)

        idx += 1
print(issues)

The application package hr_assistant is already imported.
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 809.69it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 923.89it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 852.32it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 667.01it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 650.28it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 974.83it/s]

Loaded 301 documents



--- Logging error ---
Traceback (most recent call last):
  File "/anaconda3/envs/mindmeld2/lib/python3.6/logging/__init__.py", line 994, in emit
    msg = self.format(record)
  File "/anaconda3/envs/mindmeld2/lib/python3.6/logging/__init__.py", line 840, in format
    return fmt.format(record)
  File "/anaconda3/envs/mindmeld2/lib/python3.6/logging/__init__.py", line 577, in format
    record.message = record.getMessage()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/logging/__init__.py", line 338, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:


Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


  File "/anaconda3/envs/mindmeld2/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/anaconda3/envs/mindmeld2/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/anaconda3/envs/mindmeld2/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/anaconda3/envs/mindmeld2/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
 

100%|██████████| 301/301 [00:00<00:00, 587.52it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 912.51it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 917.40it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 954.62it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 845.82it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 662.79it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 949.43it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 795.85it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 817.89it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 885.33it/s]

Loaded 301 documents





Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'


100%|██████████| 301/301 [00:00<00:00, 656.07it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 820.74it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 700.85it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 876.68it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 826.41it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 857.61it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 793.61it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 895.58it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 923.16it/s]

Loaded 301 documents
Elasticsearch index 'user_data' for application 'hr_assistant' already exists!
Loading index 'user_data'



100%|██████████| 301/301 [00:00<00:00, 850.67it/s]

Loaded 301 documents


In [None]:
print(len(issues))