In [2]:
import numpy as np
import pandas as pd
import math
import os

In [3]:
# Check if a Value is Nan
# param x - Value to check
# return boolean - If it is a Nan
def is_nan(x):
    return (x is np.nan or x != x)

In [4]:
# Create Training and Test Text Files with an Array
# param folder (str) - folder to store txt files
# param sentences (arr) - array of sentences
# param train_pct (int) - Percent of lines to use for training
def create_txt(folder, sentences, train_pct):
    np.random.shuffle(sentences)
    # Separate
    train_ct = math.floor(len(sentences)*train_pct)
    #print("Creating Text Files for " + folder)
    #print("Train Count: " + str(train_ct))
    #print("Test Count: " + str(len(sentences) - train_ct))
    train_data = sentences[:train_ct]
    test_data = sentences[train_ct:]
    # Create Directory
    directory = 'domains/hr_assistant/' + folder + '/'
    if not os.path.exists(directory): os.makedirs(directory)
    # Create File
    with open(directory + 'train.txt', 'w') as filehandle:  
        filehandle.writelines("%s\n" % train_line for train_line in train_data)
    with open(directory + 'test.txt', 'w') as filehandle:  
        filehandle.writelines("%s\n" % test_line for test_line in test_data)

In [5]:
# Scan to Make Sure that Entities have been labelled correctly
# param sentence (str) - Sentence to be scanned
def scan_sentence(sentence):
    if(not is_nan(sentence)): 
        if(sentence.count('{') != sentence.count('}') or sentence.count('|') < sentence.count('{')):
            return True

In [6]:
# Create Text Files From CSV
# Automatically generates the Domains Folder
# param df (dataframe) - Dataframe where each column is an intent and each row has sentence examples
# train_pct (float) - Percentage of Examples to be used for training
def df_gen_txt_files(df, train_pct):
    for col in df:
        print(col)
        idx = 2
        sentences = []
        nan = is_nan(df[col][idx])
        while(not nan):
            try:
                nan = is_nan(df[col][idx])
                sentences.append(df[col][idx])
                idx += 1
            except: break
        for sentence in sentences:
            if(scan_sentence(sentence)): print("MISMATCH DETECTED: " + sentence)
        create_txt(col,sentences, train_pct)
        #print("Sentences generated for " + col + "||  Line count: " + str(len(sentences)) )


In [7]:
intent_txt = pd.read_csv('intent_data.csv')
intent_txt

Unnamed: 0,get_info,get_aggregate,get_employees,get_salary,get_salary_aggregate,get_salary_employees,get_date,get_date_range_aggregate,get date_range_employees,get_hierarchy
0,Returns Data,Returns Number,Returns Employees,Returns Salary,Returns Number,Return Employees,Returns Date,Returns Number,Returns Employees,Returns Employees
1,Returns a column information about a specific ...,"Average, total, percentage of a column",Returns a list of employees that meets a colum...,Returns the salary of a specific employee.,"Average, total, percentage specifically relate...","(Returns Employee Name and Salary, Like Get_Em...","Like Get_Info but only for born, hired, fired ...","(Average, Total, Percentage) Like Get_Aggregat...","Like Get_Employees but filtered by Date(Born, ...",(up and down hierarchy employees for all names...
2,What is {nan|name}'s race?,What is the {total|function} number of {us cit...,Give me employees who are {single|maritaldesc},What is {Mia|name}'s pay?,What is the {median|function} {pay|money} of {...,Who makes the most money?,What is the {date of hiring|date_time} for {Mi...,What {percentage|function} of employees were {...,Give me a list of people {hired this year|date...,Who is {Mia|name}'s manager?
3,Is {Michael|name} {married|maritaldesc}?,What {percent|function} of employees {exceeded...,All employees from {MA|state},What is {Mia|name}'s pay rate?,what {percentage|function} of employees make {...,Which employees make more than {$|money}{10|sy...,When did {Amy|name} join the company?,What {percent|function} of employees were {hir...,Tell me about employees who {started in Spring...,Which employees have {Julia|name} as their man...
4,What is {Nan|name}'s official position?,What is the {percentage|function} of new grads...,Which employees have been recently {terminated...,What is the pay rate of {Julia|name}?,What is the {average|function} {pay rate|money}?,Which employee(s) have {lowest|comparator} {in...,How long has {Ivan|name} been with the company?,What {percentage|function} of employees were {...,Which employees were not yet {born|date_time} ...,What is the name of {Julia|name}'s manager?
5,How much does {Mia|name} get {paid|money}?,What is the {average|function} {age|age}?,Which employees are not {US citizens|citizende...,What is {Nan|name}'s pay rate?,What is the {average|function} {pay|money} of ...,Tell me who all are {making|money} {more than|...,When was {Nan|name} {fired|employment_action}?,What {percent|function} of all our employees w...,list the employees who {joined last Monday|dat...,Who is {Michael|name}'s manager?
6,How {old|age} is the manager for the engineeri...,What percent of employees are managers?,Which employees were {let go|employment_action...,How much does {Michael|name} make?,Calculate the {average|function} {pay rate|mon...,Which {software engineers|position} are {paid|...,How long was {Mia|name} working for?,What {percentage|function} of employees were {...,Which employess were {hired|employment_action}...,Who is {John Reeder|name}'s manager?
7,What is {Mia|name}'s employment status?,What's the {average|function} {age of|age} emp...,how many employees work from {California|state},Is {Mia|name} being {paid|money} {$|money}{40k...,{average|function} {pay rate|money} for {women...,who make a {salary|money} {more than|comparato...,When was {Michael|name}'s date of hire?,How many employees were {hired|employment_acti...,"Which employees were {hired after Jan 1, 2019|...",Who is {Mia Brown|name}'s manager?
8,Is {Ivan|name} from out of state?,What {percentage|function} of the employees ar...,Which employees have been {terminated|employme...,Does {Mia|name} get {$|money}{70k|sys_number} ...,What's the {average|function} {pay rate|money}?,For employees {hired|employment_action} {betwe...,What is {Nan|name}'s date of birth?,How many people were based out of {Colarado|st...,Who worked for Cisco for {less than|comparator...,Who is the manager for {Bob|name}
9,Does {Michael|name} still work at Cisco?,What {percentage|function} of employees are {e...,Which employees have been with the company lon...,When we let {Nan|name} go {fired|employment_ac...,What is the {average|function} {pay rate|money...,For employees {hired|employment_action} {betwe...,What year was {Mia|name} hired?,What {percentage|function} of employees have b...,Which employees have been {hired this year|dat...,What is {Mia|name}'s manager's name?


In [8]:
df_gen_txt_files(intent_txt, 0.8)

get_info
get_aggregate
get_employees
get_salary
get_salary_aggregate
get_salary_employees
get_date
get_date_range_aggregate
get date_range_employees
get_hierarchy
