## Data Generation Notebook 2

This notebook is showing how the the LLM was queried to ask to explain the evaluation scores generated by the EvalPro application. 

NOTE: This demo requires a `.env` file to be configured with API keys to execute. Consult [README](README.md) for more information.

### Gather evidence

In [None]:
import itertools
import pandas as pd

from session import *

from demo.ReviewPro.utils.evaluation_helpers import *

In [None]:
# Read in the input files used to evaluate functional correctness:
# Read the CSV with the correct encoding
input_df = pd.read_csv(
    os.path.join(DATASETS_DIR, "5abc_llm_input_functional_correctness.csv")
)
output_df = pd.read_csv(
    os.path.join(DATASETS_DIR, "5abc_llm_output_functional_correctness.csv")
)
output_df.drop(columns=["Unnamed: 0"], inplace=True)

# Preview the cleaned dataframe
print(input_df.columns)
output_df.columns

In [None]:
combo_df = pd.merge(input_df, output_df, left_index=True, right_index=True)
combo_df.columns

#create a prompt asking the LLM to explain the employee overall evaluation score

In [None]:
# the prompt template to ask the LLM to explain its evaluation
prompt_template2 = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an assistant to the manager of a small coffee shop.",
        ),
        (
            "human",
            """
Assistant, you provided an overal rating of {extracted_overall_rating} based on the following inputs:

Goals/objectives
{goals_and_objectives}

Employee self evaluation

{self_eval}

Manager comments

{manager_comments}

Can you explain how you arrived at that rating?
        
""",
        ),
    ]
)

In [None]:
# query the LLM with the prompt and data

chain = prompt_template2 | llm

response_df2 = []

for row_num, row in combo_df.iterrows():
    # print(row.index)

    pii_data = {
        "extracted_overall_rating": row.extractedOverallRating,
        "goals_and_objectives": row.goalsAndObjectives,
        "self_eval": row.employeeSelfEval,
        "manager_comments": row.managerComments,
    }
    prompt = prompt_template2.format(**pii_data)
    response = chain.invoke(pii_data)

    pii_data["response"] = response.content
    pii_data["prompt"] = prompt
    pii_data["model"] = llm

    response_df2.append(pii_data)

In [None]:
response_df2 = pd.DataFrame(response_df2)

In [None]:
# save the responses
response_df2.columns
response_df2.rename(
    columns={
        "goals_and_objectives": "goalsAndObjectives",
        "self_eval": "employeeSelfEval",
        "manager_comments": "managerComments",
    },
    inplace=True,
)

response_df2[
    [
        "prompt",
        "response",
        "model",
        "employeeSelfEval",
        "goalsAndObjectives",
        "managerComments",
    ]
].to_csv("data/5a_output_explainability.csv")

In [None]:
# viusualize LLM explination response
for i in response_df2.response.tolist():
    print(i)
    print("\n______________________\n")