In [53]:
import os
import io
import json
import re

import ruamel.yaml
yaml = ruamel.yaml.YAML()

from dotenv import load_dotenv
load_dotenv();

In [54]:
home = os.getenv("PROJ_HOME")
adv_data_filepath = os.path.join(home, "explorer_ai/data/adv_data.json")

In [55]:
# write list of unique subjects to a txt file for lookup

with open(adv_data_filepath, "r") as json_file:
    data = json.load(json_file)

subject_list = []

for item in data:
    subject_list.append(item.get("title"))
    subject_list += item.get("alt_titles")

subject_list = list(set(subject_list))

filename_out = os.path.join(home, "explorer_ai/rasa/data/nlu/lookups/subjects.txt")

with open(filename_out, "w") as txt_file:
    for item in subject_list:
        txt_file.write(f"{item}\n")

In [57]:
# generate annotated ask_for_explanation intents from items in subject_list

fixed_intents_file = os.path.join(home, "explorer_ai/rasa/data/nlu/intents.yml")
intent_templates_file = os.path.join(home, "explorer_ai/rasa/data/nlu/intents/ask_for_explanation/templates.txt")

with open(fixed_intents_file, "r") as stream:
    all_intents = yaml.load(stream)

# this method assumes no pipe symbols in the training data
fixed_intents = []
for item in all_intents["nlu"]:
    if item["intent"] == "ask_for_explanation":
        fixed_intents += item["examples"]

intent_templates = []
with open(intent_templates_file, "r") as file:
    for line in file:
        intent_templates.append(line.strip("\n"))

In [58]:
# merge subject list and intent templates to form list of annotated ask_for_explanation intent exampes

pattern = "(\[.*?\])"

num_templates = len(intent_templates)

def fill_template(filler, n):
    template = intent_templates[n % num_templates]
    ann_subject = "[" + filler + "]"
    match = re.search(pattern, template)
    filled = template.replace(match.group(0), ann_subject)
    return filled

intent_examples = fixed_intents
i = 0
for subject in subject_list:
    intent = fill_template(subject, i)
    intent_examples.append(intent)
    i += 1

In [59]:
# write ask_for_explanation intent examples to a txt file

intents_txt = os.path.join(home, "explorer_ai/rasa/data/nlu/intents/ask_for_explanation/examples.txt")

with open(intents_txt, "w") as txt_file:
    for item in intent_examples:
        txt_file.write(f"{item}\n")

In [60]:
# generate yaml file for intent ask_for_explanation

intent_dict = {
    "version": "3.1",
    "nlu": [
        {
            "intent": "ask_for_explanation",
            "examples": intent_examples
        }
    ]
}

def literalize_list(v):
    assert isinstance(v, list)
    buf = io.StringIO()
    yaml.dump(v, buf)
    return ruamel.yaml.scalarstring.LiteralScalarString(buf.getvalue())

def transform_value(d, key, transformation):
    if isinstance(d, dict):
        for k, v in d.items():
            if k == key:
                d[k] = transformation(v)
            else:
                transform_value(v, key, transformation)
    elif isinstance(d, list):
        for elem in d:
            transform_value(elem, key, transformation)

transform_value(intent_dict, 'examples', literalize_list)

yaml_file = os.path.join(home, "explorer_ai/rasa/data/nlu/intent_ask_for_explanation.yml")

with open(yaml_file, "w") as file:
    yaml.dump(intent_dict, file)