In [1]:
import re
import json
from pathlib import Path

import pandas as pd

# Generating CSV with data

In [2]:
raw_path = Path("raw_data")
data = list()
for fl in raw_path.glob("*.json"):
    with open(fl) as f:
        raw_data = json.load(f)
    for d in raw_data:
        del d["mfv"]["chat_history"]
        del d["mfq"]["chat_history"]
    data.extend(raw_data)


In [3]:
clean_data = list()

for answer in data:
    mfq = answer["mfq"]
    part1_order = [x[0] for x in mfq["part1_order"]]
    part2_order = [x[0] for x in mfq["part2_order"]]
    mfv = answer["mfv"]

    clean_data.append(
        [
            answer["agent"],
            answer["id"],
            answer["condition"],
            mfq["part1"],
            part1_order,
            mfq["part2"],
            part2_order,
            mfv["mfv"],
            mfv["mfv_codes"],
        ]
    )

df = pd.DataFrame(
    clean_data,
    columns=[
        "agent",
        "id",
        "condition",
        "mfq_part1",
        "part1_order",
        "mfq_part2",
        "part2_order",
        "mfv",
        "mfv_codes",
    ],
)

df.to_csv("data/raw_completions.csv", index=False)

In [4]:
class BaseSanitizer():
    def sanitize(self):
        raise NotImplementedError("Subclass must implement abstract method")

    def validate_mfq_answer(self, answer):
        # convert matches to int
        matches = [int(x) for x in self.sanitize(answer)]
        if len(matches) not in [16,]:
            return False
        elif len(matches) == 32:
            print("WARNING: 32 matches found!")
            return False
        # check if numbers fall  between 0 and 5
        if all([x >= 0 and x <= 5 for x in matches]):
            return matches
        else:
            return False
        
    def validate_mfv_answer(self, answer):
        # convert matches to int
        matches = [int(x) for x in self.sanitize(answer)]
        if len(matches) != 68:
            return False
        # check if numbers fall between 0 and 5
        if all([x >= 1 and x <= 5 for x in matches]):
            return matches
        else:
            return False


class Sanitizer(BaseSanitizer):
    pat = r"(\d+\.\s*?)?(?P<num>\d(?!\.\d))\s*?(,|\n|$|\]|`)"
    
    def sanitize(self, answer):
        answers = re.finditer(self.pat, answer)
        # extract num groups only
        return [x.group("num") for x in answers]


In [5]:
def get_sanitizer(agent):
    return Sanitizer()

In [6]:
mfq_dfs = list()
mfv_dfs = list()

errors = list()

for i, row in df.iterrows():
    agent = row["agent"]
    sanitizer = get_sanitizer(agent)
    p1 = sanitizer.validate_mfq_answer(row["mfq_part1"])
    p2 = sanitizer.validate_mfq_answer(row["mfq_part2"])
    mfv = sanitizer.validate_mfv_answer(row["mfv"])

    if p1 is False or p2 is False or mfv is False:
        errors.append(
            {
                "agent": agent,
                "id": row["id"],
                "condition": row["condition"],
                "mfq_part1": row["mfq_part1"],
                "mfq_part2": row["mfq_part2"],
                "mfv": row["mfv"],
            }
        )
        continue

    mfv_data = {
        "agent": agent,
        "id": row["id"],
        "condition": row["condition"],
        "mfv": mfv,
        "mfv_codes": row["mfv_codes"],
    }

    mfq_data = {
        "agent": [agent] * 32,
        "id": [row["id"]] * 32,
        "condition": [row["condition"]] * 32,
        "answer": p1 + p2,
        "code": row["part1_order"] + row["part2_order"],
    }

    mfq_dfs.append(pd.DataFrame(mfq_data))
    mfv_dfs.append(pd.DataFrame(mfv_data))

mfq = pd.concat(mfq_dfs)
mfq.to_csv("data/mfq.csv", index=False)
mfv = pd.concat(mfv_dfs)
mfv.to_csv("data/mfv.csv", index=False)

In [7]:
mfq[["agent", "id", "condition"]].drop_duplicates().groupby(["agent", "condition"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,id
agent,condition,Unnamed: 2_level_1
Claude 2.1,qv,92
Claude 2.1,vq,69
GPT-4,qv,98
GPT-4,vq,91
Gemini Pro,vq,1


In [8]:
class TestSanitizer():
    def sanitize(self):
        raise NotImplementedError("Subclass must implement abstract method")

    def validate_mfq_answer(self, answer):
        # convert matches to int
        matches = [int(x) for x in self.sanitize(answer)]
        if len(matches) not in [16, 32]:
            print("Wrong number of matches for MFQ!")
            print(len(matches))
            print(answer)
            return False
        # check if numbers fall  between 0 and 5
        if all([x >= 0 and x <= 5 for x in matches]):
            return matches
        else:
            print("Invalid scale for MFQ!")
            print(answer)
            print(matches)
            return False
        
    def validate_mfv_answer(self, answer):
        # convert matches to int
        matches = [int(x) for x in self.sanitize(answer)]
        if len(matches) != 68:
            print("Wrong number of matches for MFV!")
            print(answer)
            return False
        # check if numbers fall between 0 and 5
        if all([x >= 1 and x <= 5 for x in matches]):
            return matches
        else:
            print("Invalid scale for MFV!")
            print(answer)
            print(matches)
            return False


class TSanitizer(TestSanitizer):
    pat = r"(\d+\.\s*?)?(?P<num>\d(?!\.\d))\s*?(,|\n|$|\]|`)"
    
    def sanitize(self, answer):
        answers = re.finditer(self.pat, answer)
        # extract num groups only
        return [x.group("num") for x in answers]


In [9]:
s = TSanitizer()
n = 0
for d in errors[:]:
    if d["agent"] != "Gemini Pro":
        continue
    p1 = s.validate_mfq_answer(d["mfq_part1"])
    p2 = s.validate_mfq_answer(d["mfq_part2"])
    mfv = s.validate_mfv_answer(d["mfv"])
    n += 1

Wrong number of matches for MFQ!
18
5,3,4,5,5,4,5,5,4,3,3,4,5,4,3,2,2,3
Wrong number of matches for MFV!
2,2,4,3,3,5,4,3,2,3,4,2,3,5,5,2,3,2,2,3,4,4,5,5,4,3,2,3,5,4,2,2,5,5,2,2,3,2,5,3,4,5,5,4,4,3,2,4,5,4,4,3,2,5,4,3,5,2,3,2,5,2
Wrong number of matches for MFQ!
1
17.518.419.420.221.422.523.324.425.526.327.528.529.430.531.432.4
Wrong number of matches for MFV!
1.32.23.34.55.21.23.24.25.42.54.22.44.11.13.35.23.31.15.42.15.12.35.12.24.55.31.41.41.55.11.34.52.33.42.14.22.22.32.23.41.14.44.22.33.12.21.25.15.53.41.53.24.41.21.25.23.33.13.23.22.11.51.31.23.22.54.25.32.25.13.32.51.54.15.54.54.55.33.45.44.35.53.54.32.53.14.15.54.53.14.33.14.34.11.53.31.21.34.24.24.25.21.14.11.34.32.12.35.21.11.41.34.15.31.24.25.11.24.15.11.52.31.44.25.33.15.41.12.33.21.25.23.15.13.52
Wrong number of matches for MFV!
1,2,3,5,3,3,3,3,4,3,3,4,3,3,3,3,4,5,5,3,3,5,4,5,4,5,5,3,4,3,5,4,4,3,3,5,5,3,3,4,3,5,4,5,4,3,4,5,5,4,3,3,4,3,5,4,5,3
Wrong number of matches for MFQ!
1
17.418.519.420.521.322.223.324.425.526.527.228.

In [10]:
n

179