# Experiments: Conversational Grounding LLM Prediction

This jupyter notebook contains the code for the experiments for the paper titled: "Towards Harnessing Large Language Models for Enhanced Comprehension of Conversational Grounding" submitted to IWSDS 2024. 

### Imports

In [None]:
import os
import pandas as pd
import openai
import time

### Load Dialogue Dataset

In [None]:
file_path = 'annotated_dialogues.xlsx'
df = pd.read_excel(file_path, header=0)
df.head(3)

In [None]:
# Filter dataframe to specific dialogue examples, e.g. d1 with 'channel-02' and topic 'media'
df_d1 = df[(df['room_name'] == 'channel-02') & (df['topic'].str.lower() == 'media')]
df_d1 = df_d1.sort_values(by='timestamp', ascending=True)

df_d2 = df[(df['room_name'] == 'channel-02') & (df['topic'].str.lower() == 'geography')]
df_d2 = df_d2.sort_values(by='timestamp', ascending=True)

In [None]:
# Split dialogue into list of input subdialogues for LLM
# Create list with indices for the LLM predictions
def generate_input_dialogue_list(df, label_indices):
    input_dialogue_list = []
    current_dialogue = []
    for i in label_indices:
        current_dialogue = []
        for index, row in df[["role", "message", "grounding_label", "grounded_knowledge"]].iterrows():
            role = row['role']
            message = row['message']
            grounding_label = row['grounding_label']
            grounded_knowledge = row['grounded_knowledge']

            current_dialogue.append(f"{role}: {message}")

            if index == i:
                input_dialogue_list.append({'label_index': i, 'dialogue': current_dialogue, 'grounding_label': grounding_label, 'grounded_knowledge': grounded_knowledge})
                break
    return input_dialogue_list

label_indices_d1 = df_d1[df_d1['grounding_label'].isin(["explicit", "implicit", "clarification"])].index.tolist()
input_dialogue_d1 = generate_input_dialogue_list(df_d1, label_indices_d1)

label_indices_d2 = df_d2[df_d2['grounding_label'].isin(["explicit", "implicit", "clarification"])].index.tolist()
input_dialogue_d2 = generate_input_dialogue_list(df_d2, label_indices_d2)


### Define Prompts for LLM in Chat Format

* CLS = classification of grounding label
* IE = information extraction of grounded knowledge

In [None]:
# Define CLS prompts
def get_CLS_system_prompt() -> str:
    ''' Returns the system instruction for CLS of grounding labels'''
    return "Predict the grounding label, representing when knowledge has been mutually grounded, for the last turn in the 'Input dialogue:'. The label can be 'explicit' if knowledge is verbally accepted, 'implicit' if accepted by moving forward with the conversation, or 'clarification' if a previous utterance must be clarified before acceptance."

def get_CLS_zero_shot_chat_prompt(input_dialogue: str, system_message: bool = True):
    ''' Returns the template for the zero-shot prompt that predicts a grounding label given a dialogue'''
    message = []

    if system_message:
        message.append({"role": "system", "content": f"{get_CLS_system_prompt()}"})

    message.append({"role": "user", "content": f"""Input dialogue: {input_dialogue}
Output label: """})
    return message

user_CLS_example_1 = """
Input dialogue:
seeker: Can you tell me about the dataset's content?
provider: The dataset contains information about planets in our solar system.
seeker: What is the number of columns in the dataset?
"""
assistant_CLS_example_1 = """
Output label: implicit
"""
user_CLS_example_2 = """
Input dialogue:
provider: My dataset has 191 rows and several columns.
provider: There is a column for the human development index.
seeker: But what does it represent and how is this index calculated?
"""
assistant_CLS_example_2 = """
Output label: clarification
"""
user_CLS_example_3 = """
Input dialogue:
provider: The Varso Tower is the tallest building in the EU.
seeker: Okay, thanks.
"""
assistant_CLS_example_3 = """
Output label: explicit
"""

def get_CLS_few_shot_chat_prompt(input_dialogue: str, system_message: bool = True) -> list:
    ''' Returns the template for the few-shot prompt that predicts a grounding label given a dialogue'''
    message = []
    # system instruction
    if system_message:
        message.append({"role": "system", "content": f"{get_CLS_system_prompt()}"})
    # few-shot examples
    message.append({"role": "user", "content": f"""{user_CLS_example_1}"""})
    message.append({"role": "assistant", "content": f"""{assistant_CLS_example_1}"""})
    message.append({"role": "user", "content": f"""{user_CLS_example_2}"""})
    message.append({"role": "assistant", "content": f"""{assistant_CLS_example_2}"""})
    message.append({"role": "user", "content": f"""{user_CLS_example_3}"""})
    message.append({"role": "assistant", "content": f"""{assistant_CLS_example_3}"""})
    # input dialogue
    message.append({"role": "user", "content": f"""Input dialogue: {input_dialogue}
Output label: """})
    return message


In [None]:
# Define IE prompts
def get_IE_system_prompt() -> str:
    ''' Returns the system instruction for IE of grounded knowledge'''
    return "Predict the newly grounded knowledge for the last turn in the 'Input dialogue:'. Use the JSON structure: {'table_domain': str, 'table_content': str, 'row_count': int, 'column_count': int, 'column_info': [{'column_name': str, 'values': [], 'distinct_count': int, 'min_value': int, 'max_value': int}]}. Adhere strictly to the JSON structure, and only predict the attributes mentioned in the dialogue turns, leaving unmentioned attributes as null."

def get_IE_zero_shot_chat_prompt(input_dialogue: str, system_message: bool = True) -> list:
    ''' Returns the template for the zero-shot prompt that predicts a JSON with grounded knowledge given a dialogue'''
    message = []

    if system_message:
        message.append({"role": "system", "content": f"{get_IE_system_prompt()}"})
    
    message.append({"role": "user", "content": f"""Input dialogue: {input_dialogue}
Output JSON: """})
    return message


user_IE_example_1 = """
Input dialogue:
seeker: Can you tell me about the dataset's content?
provider: The dataset contains information about planets in our solar system.
seeker: What is the number of columns in the dataset?
"""
assistant_IE_example_1 = """
Output JSON: {'table_content': 'planets of the solar system'}
"""
user_IE_example_2 = """
Input dialogue:
provider: My dataset has 191 rows and several columns.
provider: There is a column for the human development index.
seeker: But how is this index calculated and what does it mean?
"""
assistant_IE_example_2 = """
Output JSON: {'row_count': 191, 'column_info': [{'column_name': 'human development index', 'description': null}]}
"""
user_IE_example_3 = """
Input dialogue:
provider: One column contains data about the height of the building in meters.
provider: The Varso Tower is the tallest building in the dataset with 310 m.
seeker: Okay, thanks.
"""
assistant_IE_example_3 = """
Output JSON: {'column_info': [{'column_name': 'height', 'description': 'height in meters', 'max_value': 310}]}
"""

def get_IE_few_shot_chat_prompt(input_dialogue: str, system_message: bool = True) -> list:
    ''' Returns the template for the few-shot prompt that predicts a JSON with grounded knowledge given a dialogue'''
    message = []
    # system instruction
    if system_message:
        message.append({"role": "system", "content": f"{get_IE_system_prompt()}"})
    # few-shot examples
    message.append({"role": "user", "content": f"""{user_IE_example_1}"""})
    message.append({"role": "assistant", "content": f"""{assistant_IE_example_1}"""})
    message.append({"role": "user", "content": f"""{user_IE_example_2}"""})
    message.append({"role": "assistant", "content": f"""{assistant_IE_example_2}"""})
    message.append({"role": "user", "content": f"""{user_IE_example_3}"""})
    message.append({"role": "assistant", "content": f"""{assistant_IE_example_3}"""})
    # input dialogue
    message.append({"role": "user", "content": f"""Input dialogue: {input_dialogue}
Output JSON: """})
    return message

### Send Prompt to LLM (OpenAI API)

In [None]:
openai.api_key = os.getenv('OPENAI_API_KEY')
def query_gpt(prompt):
    response = openai.ChatCompletion.create(
        max_tokens = 256,
        model="gpt-3.5-turbo-1106",  # set model version = gpt-3.5-turbo-1106
        messages=prompt, # provide prompt in chat format
        temperature=0) # # set model temperature = 0
    return response

In [None]:
# Run inference to get LLM predictions
def run_CLS_inference(df, label_indices, input_dialogue, n_last_turns=None):
    df_predictions = df
    df_predictions["CLS_prediction"] = ""
    df_predictions["CLS_prompt"] = ""
    # n_last_turns = 100 # number of last input turns before last dialogue utterance
    label_indices = label_indices
    for idx in label_indices:
        print("label index:", idx)
        if n_last_turns: # provide number of last input turns before last dialogue utterance
            context_dialogue = "\n".join(input_dialogue[label_indices.index(idx)]["dialogue"][-(n_last_turns+1):])
        else: # provide full dialogue history
            context_dialogue = "\n".join(input_dialogue[label_indices.index(idx)]["dialogue"])
        prompt = get_CLS_few_shot_chat_prompt(context_dialogue)
        print("prompt:", prompt)
        response = query_gpt(prompt=prompt)
        result = response.choices[0].message['content'].strip()
        print("prediction:", result)
        print("\n\n")
        df_predictions.at[idx, "CLS_prompt"] = prompt
        df_predictions.at[idx, "CLS_prediction"] = result
        time_string = time.strftime("%Y%m%d_%H%M%S", time.localtime(int(time.time())))
        df_predictions.to_excel("CLS_predictions_" + time_string + ".xlsx", index=False)
        time.sleep(8)
    return df_predictions

def run_IE_inference(df, label_indices, input_dialogue, n_last_turns=None):
    df_predictions = df
    df_predictions["IE_prediction"] = ""
    df_predictions["IE_prompt"] = ""
    # n_last_turns = # number of last input turns before last dialogue utterance
    label_indices = label_indices
    for idx in label_indices:
        print("label index:", idx)
        if n_last_turns: # provide number of last input turns before last dialogue utterance
            context_dialogue = "\n".join(input_dialogue[label_indices.index(idx)]["dialogue"][-(n_last_turns+1):])
        else: # if None: provide full dialogue history
            context_dialogue = "\n".join(input_dialogue[label_indices.index(idx)]["dialogue"])
        prompt = get_IE_few_shot_chat_prompt(context_dialogue)
        print("prompt:", prompt)
        response = query_gpt(prompt=prompt)
        result = response.choices[0].message['content'].strip()
        print("prediction:", result)
        print("\n")
        df_predictions.at[idx, "IE_prompt"] = prompt
        df_predictions.at[idx, "IE_prediction"] = result
        time_string = time.strftime("%Y%m%d_%H%M%S", time.localtime(int(time.time())))
        df_predictions.to_excel("IE_predictions_" + time_string + ".xlsx", index=False)
        time.sleep(8)
    return df_predictions

In [None]:
# df_CLS_predictions_d1 = run_CLS_inference(df=df_d1, label_indices=label_indices_d1, input_dialogue=input_dialogue_d1, n_last_turns=None)
# df_CLS_predictions_d2 = run_CLS_inference(df=df_d2, label_indices=label_indices_d2, input_dialogue=input_dialogue_d2, n_last_turns=None)
# df_IE_predictions_d1 = run_IE_inference(df=df_d1, label_indices=label_indices_d1, input_dialogue=input_dialogue_d1, n_last_turns=None)
# df_IE_predictions_d2 = run_IE_inference(df=df_d2, label_indices=label_indices_d2, input_dialogue=input_dialogue_d2, n_last_turns=None)
