In [171]:
import pandas as pd
import yaml
import json
import os
from icecream import ic
import re


In [204]:
def convert_yml_json(yml_path):

    # Read YAML content from the file
    with open(yml_path, "r") as file:
        yaml_content = yaml.safe_load(file)


    # Convert to JSON
    json_output = json.dumps(yaml_content, indent=2)

    # Save to a file (optional)
    with open("output.json", "w") as json_file:
        json_file.write(json_output)

    return json_output

def check_intents_domain(domain_path):
    intent_list = []
    # Read YAML content from the file
    for file in os.listdir(domain_path):
        if "intent" in file and file.endswith(".yml"):
            intent_file = os.path.join(domain_path, file)
            # Check if the file has a .yml extension
            with open(intent_file, "r") as file:
                domain_content = yaml.safe_load(file)
                temp = domain_content.get("intents")
                intent_list.extend(temp)
        
    
    return intent_list

def check_intents_nlu(nlu_path):
    list_dict = []
    # Iterate over all files in the directory
    for nlu_file in os.listdir(nlu_path):
        # Check if the file has a .yml extension
        if nlu_file.endswith(".yml"):
            # Construct the full file path
            file_path = os.path.join(nlu_path, nlu_file)  
            # Read YAML content from the file
            with open(file_path, "r") as file:
                nlu_content = yaml.safe_load(file)
            intents_nlu = nlu_content.get("nlu")
            intents_nlu = [intent.get("intent") for intent in intents_nlu]
            list_dict.append({intent : {"file" : nlu_file} for intent in intents_nlu})
    #transform list of dictionaries to a single dictionary
    result = {}
    for dictionary in list_dict:
        result.update(dictionary)
   
    return result




def check_intent_responses(domain_path):
    # get intents from domain
    intents_domain = check_intents_domain(domain_path)
    
    intent_in_responses = {}
    # Read YAML content from the file
    for file in os.listdir(domain_path):
        if "response" in file and file.endswith(".yml"):
            file_path = os.path.join(domain_path, file)
            with open(file_path, "r") as file:
                domain_content = yaml.safe_load(file)

            # # Get intents from domain
            # intents_domain = domain_content.get("intents")
            
            # Get responses from domain
            responses_domain = domain_content.get("responses")
            # ic(responses_domain)

            # Get the responses names
            responses_names = list(responses_domain.keys())
            
            #match intents with responses
            
            for intent in intents_domain:
                if "utter_" + intent in responses_names:
                    intent_in_responses[intent] = {"action" : "utter_" + intent,
                                                "type" : "response",
                                                "file": os.path.basename(file_path)}
            buttons_actions = {}
            #match actions and buttons in responses
            i = 0
            for response in responses_domain:
                if "buttons" in responses_domain.get(response)[0].keys():
                    # ic(response)
                    # ic(responses_domain.get(response)[0].get("buttons"))
                    for button in responses_domain.get(response)[0].get("buttons"):
                        # ic(button)
                        buttons_actions[button.get("payload").strip("/")] = {"intent": response}
            
    
    df_intent = pd.DataFrame(intent_in_responses).T.reset_index()
    df_intent.columns = ["intent", "action", "type", "file"]

    return df_intent, buttons_actions

def check_intent_rules(rules_path):
    #iterate over the rules files in rules folder:
    intent_rules = {}
    # Iterate over all files in the directory
    for rule_file in os.listdir(rules_path):
        # Check if the file has a .yml extension
        if rule_file.endswith(".yml"):
            # Construct the full file path
            file_path = os.path.join(rules_path, rule_file)
            # Read YAML content from the file
            with open(file_path, "r") as file:
                rules_content = yaml.safe_load(file)
            # extract intent and related actions from rules
            rules_content = rules_content.get("rules")
            
            for rule in rules_content:
                intent = rule.get("steps")[0].get("intent")
                actions = rule.get("steps")[1].get("action")
                intent_rules[intent] = {"action" : actions,
                                    "type" : "rule",
                                    "reference" : rule.get("rule"),
                                    "file" : rule_file}
        df_intent = pd.concat([pd.DataFrame(intent_rules).T]).reset_index()
        df_intent.columns = ["intent", "action", "type", "reference", "file"]
        return df_intent

def check_intent_stories(stories_path):
    intent_stories = {}
    #iterate over the stories files in stories folder:
    # Iterate over all files in the directory
    for stories_file in os.listdir(stories_path):
        # ic(stories_file)
        # Check if the file has a .yml extension
        if stories_file.endswith(".yml"):
            # Construct the full file path
            file_path = os.path.join(stories_path, stories_file)
            # ic(file_path)
            # Read YAML content from the file
            with open(file_path, "r") as file:
                stories_content = yaml.safe_load(file)
            
            stories_content = stories_content.get("stories")
            #extract intent and related actions from each story in stories
            for story in stories_content:
                # ic(story.get("steps"), len(story.get("steps")))
                i=0
                while i < len(story.get("steps")):
                    # ic(i)
                    if "intent" in story.get("steps")[i]:
                        intent = story.get("steps")[i].get("intent")
                        # ic(intent)
                        list_actions = []
                        j = 1
                        if i + j < len(story.get("steps")):
                            while i + j < len(story.get("steps")) and "intent" not in story.get("steps")[i+j]:
                                if "action" in story.get("steps")[i+j]:
                                    list_actions.append(story.get("steps")[i+j].get("action"))
                                j += 1
                        if len(list_actions) > 0:
                            for action in list_actions:
                                intent_stories[intent] = {"action" : action,
                                                        "type" : "story",
                                                        "reference" : story.get("story"),
                                                        "file" : stories_file}
                        else:
                            intent_stories[intent] = {"action" : None,
                                                        "type" : "story",
                                                        "reference" : story.get("story"),
                                                        "file" : stories_file}
                        # ic(intent_stories)
                        i+=j
                    else:
                        i+=1          
        
    df_intent =  pd.DataFrame(intent_stories).T.reset_index()
    df_intent.columns = ["intent", "action", "type", "reference", "file"]
    
    return df_intent

def check_action_responses_in_actions(folder_path):
    #initiate the values
    current_class = None  # Keeps track of the current class
    action_name = None  # Keeps track of the current action
    actions = []
    slots = []
    intents = []
    responses = []
    payloads = []
    for file in os.listdir(folder_path):
        if "actions" in file and file.endswith(".py"):
            file_path = os.path.join(folder_path, file)
            
            with open(file_path, "r") as file:
                lines = file.readlines()
                
                file_name = os.path.basename(file_path)

                # Loop through all lines in the file
                with open(file_path, "r") as file:
                    lines = file.readlines()
                i=0
                j=0
                while i<len(lines):
                    line = lines[i]
                    
                    # Check if the line defines the class name
                    if line.startswith("class ") and "Action" in line:
                        # Extract class name
                        class_name = line.split()[1].strip().removeprefix("Action").replace(":", "")
                        if current_class != class_name:
                            if current_class and action_name:
                                # Add action to dictionary when changing class
                                temp = {"action": action_name,
                                    "class": current_class,
                                    "action_story": current_story,
                                    "action_response": responses,
                                    "action_file": file_name,
                                    "action_intent": list(set(intents)),
                                    "action_slots": list(set(slots)),
                                    "action_payload": list(set(payloads))
                                    }
                                actions.append(temp) #add the action to the list of actions

                            # reset all values that includes the case when current_class is None
                            current_class = class_name
                            current_story = None # Reset story when entering a new class
                            action_name = None # Reset action when entering a new class
                            responses = [] # Reset list of responses when entering a new class
                            intents = []
                            slots = []# Reset list of intents when entering a new class
                            
                    # Check if the line defines the current story
                    elif "current_story" in line and "=" in line:

                        if line.split("=")[0].strip() == "current_story":
                            # Extract current story name
                            current_story = line.split("=")[1].strip().replace('"', '')
                            if '#' in current_story:
                                current_story = current_story.split("#")[0].strip().strip('"')
                            # Remove quotes
                            # ic(current_story)

                    # Check if the line defines the action name
                    elif line.strip().startswith("def name(self)"):
                        # Extract action name
                        action_name = lines[i+1].split("return")[1].strip().replace('"', '')

                        i+=1
                    
                    #check if the line defines the response
                    if line.strip().startswith("dispatcher.utter_message"):

                        response = None
                        temp = line.split("(")[1].split(")")[0].strip()
                        if temp.startswith('"'):
                            response = temp.strip('"')
                        elif temp.startswith("response=") or temp.startswith("response ="):
                            response = temp.split("=")[1].split('"')[1].strip()
                            # response = temp.split("=")[1].split('"')[0].strip()
                            # ic("yo", response)
                        if response:
                            responses.append(response)

                    if line.strip().startswith('if tracker.latest_message.intent.get("name")'):

                        temp = line.split("==")[1].split(")")[0].strip().strip('"')
                        intents.append(line.split("==")[1].split('":')[0].strip().strip('"'))
                    
                    if "SlotSet(" in line:
                        slots.append(line.split("SlotSet(")[1].split(",")[0].strip().strip('"'))
                    
                    if "tracker.latest_message.get('text')" in line:
                        slots.append(line.split("{")[1].split(": tracker.latest_message.get('text')")[0].strip().strip('"'))
                    
                    if '"payload":' in line:
                        # Extract the payload using regex
                        # ic(line)
                        match = re.search(r'"payload":\s*\'?/([^\{"\'\s]+)', line)
                        if match:
                            ic(match.group(1))
                            payloads.append(match.group(1).strip())
                        else:
                            match = re.search(r'"payload":\s*\"(/[^}]*)', line)
                            if match:
                                ic(match.group(1))
                                payloads.append(match.group(1).strip('"').strip("/"))
                            
                       
                        # payloads.append(line.split('"payload":')[1].split(",")[0].strip(\n).strip('"}').strip('"'))
                        
                    i+=1
    ic(payloads)
    df = pd.DataFrame(actions)
    # df.columns = ["action", "class", "action_story", "action_response", "action_file", "action_intent"]
    return df

def check_intent(domain_path, nlu_path, rules_path, stories_path, actions_path):
    intents_nlu = check_intents_nlu(nlu_path)
    print("nlu_done")
    intents_domain = check_intents_domain(domain_path)
    print("domain_done")
    df_rules = check_intent_rules(rules_path)
    print("rules_done")
    df_stories = check_intent_stories(stories_path)
    print("stories_done")
    df_response, button_response_dic = check_intent_responses(domain_path)
    print("response_done")
    df_actions = check_action_responses_in_actions(actions_path)
    actions_from_py_list = df_actions['action'].unique()
    print("actions_done")
    df_intent = pd.concat([df_rules, df_stories, df_response])
    list_intent_df = df_intent['intent'].unique()
    list_intents_nlu_domain = set(list(intents_nlu.keys()) + intents_domain)
    #add rows to the dataframe for intents not in rules, stories and responses
    list_df = [] #container to store the intents not in rules, stories and responses
    for intent in list_intents_nlu_domain:
        if intent not in list_intent_df:
            list_df.append({"intent" : intent,
                            "action" : None,
                            "type" : None,
                            "reference" : None})
    # add the actions from the actions.py file if they are not in the actions listed
    list_actions_to_add = [i for i in actions_from_py_list if i not in df_intent['action'].unique()]
    # create a dataframe for the actions not in the intents
    for action in list_actions_to_add:
        list_df.append({"intent" : None,
                        "action" : action,
                        "type" : "action",
                        "reference" : None})
    df = pd.concat([df_intent, pd.DataFrame(list_df)]).reset_index(drop=True)
    df['nlu'] = df['intent'].apply(lambda x: True if x in intents_nlu else False)
    df['nlu_file'] = df['intent'].apply(lambda x: intents_nlu.get(x).get("file") if x in intents_nlu else None)
    df['domain'] = df['intent'].apply(lambda x: True if x in intents_domain else False)
    df['button'] = df['intent'].apply(lambda x: button_response_dic[x] if x in button_response_dic.keys() else False)
    df.sort_values(by=['nlu',"intent"], inplace=True)
    date = pd.Timestamp.now().strftime("%YY%m%d")
    df = df.merge(df_actions, on="action", how="left")
    df = df.explode("action_response", ignore_index=True)
    df = df.explode("action_intent", ignore_index=True)
    df = df.explode("action_slots", ignore_index=True)
    df.to_csv(f"data/intent_check_{date}.csv", index=False)
    print(len(df))
    return df


In [205]:
domain_path = "domain.yml"
nlu_path = "data/nlu"
stories_path = "data/stories"
rules_path = "data/rules"
actions_path = "actions"

df = check_intent(domain_path, nlu_path, rules_path, stories_path, actions_path)

df.head(50)

ic| match.group(1): '/affirm"'
ic| match.group(1): '/deny"'
ic| match.group(1): '/exit_grievance_process"'
ic| match.group(1): '/affirm"'
ic| match.group(1): '/deny"'
ic| match.group(1): '/skip"'
ic| match.group(1): 'set_category'
ic| match.group(1): 'set_category'
ic| match.group(1): 'set_category'
ic| match.group(1): 'set_category'
ic| match.group(1): '/skip"'
ic| match.group(1): '/validate_summary"'
ic| match.group(1): '/edit_grievance_summary"'
ic| match.group(1): '/skip_summary"'
ic| match.group(1): '/restart_story{\\"restart_type\\": \\"process\\"'
ic| match.group(1): '/restart_story{\\"restart_type\\": \\"story\\"'
ic| match.group(1): '/main_menu"'
ic| payloads: ['affirm',
               'deny',
               'exit_grievance_process',
               'affirm',
               'deny',
               'skip',
               'set_category',
               'set_category',
               'set_category',
               'set_category',
               'skip',
               'validate_summ

nlu_done
domain_done
rules_done
stories_done
response_done
actions_done
61


Unnamed: 0,intent,action,type,reference,file,nlu,nlu_file,domain,button,class,action_story,action_response,action_file,action_intent,action_slots,action_payload
0,edit_grievance_summary,action_edit_grievance_summary,story,User edits the summary of the grievance,stories.yml,False,,True,False,EditGrievanceSummary(Action),,,grievance_actions.py,,,"[skip_summary, set_category, deny, skip, affir..."
1,launch_chatbot,,,,,False,,True,False,,,,,,,
2,provide_location,,,,,False,,True,False,,,,,,,
3,provide_optional,,,,,False,,True,False,,,,,,,
4,provide_phone,,,,,False,,True,False,,,,,,,
5,provide_updated_summary,utter_ask_location,story,User edits the summary of the grievance,stories.yml,False,,True,False,,,,,,,
6,skip_summary,utter_ask_location,story,User edits the summary of the grievance,stories.yml,False,,True,False,,,,,,,
7,skip_summary,utter_skip_summary,response,,responses.yml,False,,True,False,,,,,,,
8,start_grievance_process,action_submit_grievance,story,unhappy user files grievance,unhappy_and_upset.yml,False,,True,False,SubmitGrievance(Action),,,grievance_actions.py,,grievance_id,"[skip_summary, set_category, deny, skip, affir..."
9,start_grievance_process,utter_start_grievance_process,response,,responses_for_action.yml,False,,True,False,,,,,,,
