In [None]:
import re
import pandas as pd
import plotly.express as px
import spacy
from copy import deepcopy

## Information on answers
Which answer corresponds to which prompt, etc

In [None]:
info_answers = pd.read_csv("info_answers.csv")
print(info_answers.shape)
info_answers.head(3)

## Get answers from forms
granularity, relevance, diversity, succinctness

In [None]:
def read_answers(data_path):
    df = pd.read_csv(data_path).transpose()[:-1]
    time_info = df.iloc[0]
    df = df[1:]
    columns = list(df.columns)
    df.reset_index(inplace=True)
    df.columns = ["answer"] + columns
    
    return df.melt(id_vars='answer', value_vars=columns, var_name='n', value_name='value')

In [None]:
answers = {f"f{i}": read_answers(f"f{i}_answers.csv") for i in ["1", "2"]}
answers["f1"].head(3)

In [None]:
def build_data(answers, info_answers, columns):
    data_list = []
    for form, info in answers.items():
        info_answer = info_answers[info_answers.form == form].reset_index(drop=True)
        for n in info.n.unique():
            curr_df = pd.concat([info_answer, info[info.n == n].reset_index(drop=True)], axis=1)
            curr_df.reset_index(drop=True, inplace=True)
            data_list.append(curr_df)
    data = pd.concat(data_list, axis=0, ignore_index=True).reset_index(drop=True)
    return data

In [None]:
columns = list(info_answers.columns) + list(answers["f1"].columns)
data = build_data(answers=answers, info_answers=info_answers, columns=columns)
data

## Add groundedness

In [None]:
def clean_answer(text):
    new_text = re.sub(r'""" \[.+\]', '', text)
    new_text = re.sub(r'"""\n \[.+\]', '', new_text)
    new_text = re.sub(r'Answer: \n"""\n', '', new_text)
    new_text = re.sub(r'Answer:\n"""\n', '', new_text)
    new_text = re.sub(r'Answer:  \n"""\n', '', new_text)
    return new_text

grounding = pd.read_csv("grounding.csv")
grounding["answer_pp"] = grounding["answer"].apply(clean_answer)
grounding.groundedness = grounding.groundedness.str.replace(',', '.').astype(float)
print(grounding.shape)
grounding.head(3)

In [None]:
data_grounded = deepcopy(data[["form", "type_question", "data", "type_answer", "answer"]])
data_grounded["answer_pp"] = data_grounded["answer"].apply(clean_answer)
data_grounded = data_grounded.drop_duplicates().sort_values(by="answer_pp")
print(data_grounded.shape)
data_grounded.head(3)

In [None]:
data_grounded = pd.merge(data_grounded, grounding[["answer_pp", "groundedness"]], on='answer_pp', how='left')
data_grounded["metric"] = "groundedness"
data_grounded["n"] = -1
data_grounded = data_grounded.rename(columns={"groundedness": "value"})
data_grounded["answer"] = 'Answer:\n"""\n' + data_grounded["answer_pp"] + '""" \[Groundedness]'
data_grounded = data_grounded[data.columns].drop_duplicates()
print(data_grounded.shape)
data_grounded.head(3)

In [None]:
data = pd.concat([data, data_grounded], axis=0, ignore_index=True).reset_index(drop=True)
print(data.shape)
data.head(3)

## Analyse results

In [None]:
data.groupby(["type_answer", "metric"]).agg({"value": "mean"})

In [None]:
def format(x):
    if str(x)[0].isdigit():
        return str(round(x, 2))
    return "\\texttt{" + str(x).replace("_", "\\_") + "}"

mean_res = data.groupby(["type_answer", "metric"]).agg({"value": "mean"})
latex_data = mean_res.reset_index().pivot(index="type_answer", columns="metric", values="value").reset_index()
columns = ["type_answer", "granularity", "relevance", "succinctness", "diversity", "groundedness"]
for _, row in latex_data.iterrows():
    print(' & '.join([format(row[x]) for x in columns]) + " \\" + "\\")

In [None]:
for metric in data.metric.unique():
    fig = px.histogram(data[data.metric==metric], x="value", color="type_answer", barmode="group")
    print(f"Metric: {metric}")
    fig.show()

In [None]:
data.groupby(["type_answer", "metric"]).agg({"value": "mean"})

In [None]:
# Sample DataFrame
grouped = data.groupby(["type_answer", "metric"]).agg({"value": "mean"}).reset_index()

# Create scatter plot using Plotly Express
fig = px.scatter(grouped, x='metric', y='value', color='type_answer', #symbol='metric',
                 color_discrete_sequence=px.colors.qualitative.Set1,
                 #symbol_sequence=['circle', 'square', 'diamond', 'cross', 'x', 'triangle-up']
                 )

# Update layout
fig.update_layout(
    title='Scatter Plot',
    xaxis=dict(title='Type Question'),
    yaxis=dict(title='Value', range=[1,5])
)

# Show the plot
fig.show()

In [None]:
grounding[["nb_events", "nb_mentioned"]]