# Compute MDG survey results

This document will allow us to compute the MDG averages from the users and Krippendorf's alpha between them.

In [25]:
import pandas as pd
import json
from models import MDGScore, QuestionResSchema

## Create an index of questions per model

In [2]:
# Double check this directly in the survey
mdg_survey_1_index = {
    "baseline-gpt-3-5-turbo-16k-0613": [8,11],
    "assistant-gpt-3-5-turbo-16k-0613": [0,1,3,4,5,6,9],
    "pipeline-gpt-3-5-turbo-16k-0613": [2,7,10],
}

# Double check this directly in the survey
mdg_survey_2_index = {
    "baseline-gpt-3-5-turbo-16k-0613": [0,3,6,9,11],
    "assistant-gpt-3-5-turbo-16k-0613": [5,10],
    "pipeline-gpt-3-5-turbo-16k-0613": [1,2,4,7,8],
}

# Double check this directly in the survey
mdg_survey_3_index = {
    "baseline-gpt-3-5-turbo-16k-0613": [0,2,5,7,9],
    "assistant-gpt-3-5-turbo-16k-0613": [1,6,10],
    "pipeline-gpt-3-5-turbo-16k-0613": [3,4,8,11],
}

## Convert the question list to a dictionary

In [28]:
# Read csv file 2
csv_mdg_2_path = "../../evals/human/multi-dimension/results/twiga-mdg-2.csv"
json_mdg_path = "../../evals/human/multi-dimension/multi-dimension-survey.json"

In [46]:
"""Read JSON file of queries into three separate mdg survey dataaframes"""
with open(json_mdg_path, 'r') as file:
    data = json.load(file)

# Extract the relevant information and store it in a dictionary
mdg_queries_1_df = pd.DataFrame(data[0:12])
mdg_queries_2_df = pd.DataFrame(data[12:24])
mdg_queries_3_df = pd.DataFrame(data[24:36])

verbose = True
if verbose:
    print(mdg_queries_1_df.columns)
    print(mdg_queries_2_df.columns)
    print(mdg_queries_3_df.columns)

    print(len(mdg_queries_1_df))
    print(len(mdg_queries_2_df))
    print(len(mdg_queries_3_df))


Index(['query', 'response', 'exercise_format', 'topic', 'source_file'], dtype='object')
Index(['query', 'response', 'exercise_format', 'topic', 'source_file'], dtype='object')
Index(['query', 'response', 'exercise_format', 'topic', 'source_file'], dtype='object')
12
12
12


In [50]:
"""Read the csv file into a dataframe"""
# Read the CSV file into a DataFrame, skipping the first row (metadata)
df = pd.read_csv(csv_mdg_2_path, skiprows=[1,2])

# Assuming the first column is 'Name' and subsequent columns are responses
name_column = df.columns[17]  # The title of the name column
sanity_check_columns = df.columns[21:30] # TODO: get the actual query-response pair I made manually and show them somewhere along with these (can put in appendix)
response_columns = df.columns[30:66] # The titles of the question columns and associated responses


# Extract the names and their associated responses
respondent_responses = {}
for index, row in df.iterrows():
    name = row[name_column]
    responses = row[response_columns].tolist()
    answer_relevance_responses = responses[::3]
    formulation_responses = responses[1::3]
    suitability_responses = responses[2::3]
    respondent_responses[name] = {
        "answer_relevance": answer_relevance_responses,
        "formulation": formulation_responses,
        "suitability": suitability_responses
    }

# Print the results (or you can save them to a file if needed)
verbose = False
if verbose:
    print(response_columns[::3])
    print(response_columns[1::3])
    print(response_columns[2::3])

    for respondent, responses in respondent_responses.items():
        print(f"Respondent: {respondent}\nResponses: {responses}")


Index(['Q3.1', 'Q4.1', 'Q5.1', 'Q6.1', 'Q7.1', 'Q8.1', 'Q9.1', 'Q10.1',
       'Q11.1', 'Q12.1', 'Q13.1', 'Q14.1'],
      dtype='object')
Index(['Q3.2', 'Q4.2', 'Q5.2', 'Q6.2', 'Q7.2', 'Q8.2', 'Q9.2', 'Q10.2',
       'Q11.2', 'Q12.2', 'Q13.2', 'Q14.2'],
      dtype='object')
Index(['Q3.3', 'Q4.3', 'Q5.3', 'Q6.3', 'Q7.3', 'Q8.3', 'Q9.3', 'Q10.3',
       'Q11.3', 'Q12.3', 'Q13.3', 'Q14.3'],
      dtype='object')
Respondent: Ramadhani 
Responses: {'answer_relevance': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], 'formulation': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], 'suitability': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]}
Respondent: Veronica 
Responses: {'answer_relevance': [3, 2, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4], 'formulation': [3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4], 'suitability': [3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4]}
Respondent: Stedius bernado
Responses: {'answer_relevance': [4, 2, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5], 'formulation': [5, 2, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5], 'suitability': [2, 4, 5, 5, 5, 5, 4, 5