In [6]:
# Import all necessary libraries
import re
import pprint

import hashlib
import json
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [7]:
# Define functions to extract results from data, as well as output data

def extract_score(s):
    """Extract score from dictionary."""
    assert len(s) <= 1
    if 'scale_1' in s:
        return int(s['scale_1'])
    elif 'scale_2' in s:
        return int(s['scale_2'])
    elif 'scale_3' in s:
        return int(s['scale_3'])
    elif 'scale_4' in s:
        return int(s['scale_4'])
    elif 'scale_5' in s:
        return int(s['scale_5'])
    
    return np.nan

def load_results(filename, name, verbose=False):
    """Load results."""
    data = {}
    
    with open(filename, 'r') as f:
        for line in f:
            v = json.loads(line)

            if len(v['id']) > len('2000000'):
                if verbose:
                    print(v)
                continue

            h = hashlib.md5(v['displayed_text'].encode('utf-8')).hexdigest()
            data[v['id']] = {
                'id': v['id'],
                'displayed_text': ' '.join(v['displayed_text'][:].split()),
                #'span_annotations': v['span_annotations'],
                #'behavioral_data': v['behavioral_data'],

                'calculation_error': 1 if 'Calculation Error' in v['label_annotations'].get('Problems', '') else 0,
                'hallucination_error': 1 if 'Hallucinations' in v['label_annotations'].get('Problems', '') else 0,
                'omission_error': 1 if 'Omission' in v['label_annotations'].get('Problems', '') else 0,
                'irrelevant_error': 1 if 'Irrelevant' in v['label_annotations'].get('Problems', '') else 0,
                'logic_error': 1 if 'Logic Error' in v['label_annotations'].get('Problems', '') else 0,
                'everything_okay': 1 if 'Everything is ok.' in v['label_annotations'].get('Problems', '') else 0,

                'correctness_score': extract_score(v['label_annotations'].get('conclusion_Score', {})),
                'logic_score': extract_score(v['label_annotations'].get('logic_Score', {})),
                'truthfulness_score': extract_score(v['label_annotations'].get('truthfulness_Score', {})),

                'confidence_score': extract_score(v['label_annotations'].get('Confidence_Score', {})),

                'info': v['label_annotations'].get('textbox_input', {}).get('Descriptive grade', '') + \
                        ', ' + v['label_annotations'].get('textbox_input', {}).get('Comments', ''),
                'difficulty': int(v['id'][0]),  # 1=easy, 2=medium, 3=hard
                'originality': int(v['id'][1]),  # 0=copied, 1=paraphrased, 2=original
                'context': int(v['id'][2]),  # 0=no, 1=relevant, 2=vague, 3.irrelevant
                'author': int(v['id'][3]),  # 0=no, 1=relevant, 2=vague, 3.irrelevant
                'question_id': v['id'][-4:],
            }
    
    data = pd.DataFrame(data).T.set_index('id').sort_values(['question_id', 'context'])
    data['grader'] = name
    
    return data

def output_data(df_out, filename):
    # Convert DataFrame to JSON string
    json_str = df_out.to_json(orient="records")
    
    # Wrap the JSON string in {}
    json_str_wrapped = f'{{"data": {json_str}}}'
    
    # Write the JSON string to a file
    with open(filename, "w") as file:
        file.write(json_str_wrapped)


In [8]:
# loading the data
df = pd.concat([
    load_results('raw_data/abram.jsonl', name='abram'),
    load_results('raw_data/haoran.jsonl', name='haoran'),
    load_results('raw_data/louis.jsonl', name='louis'),
    load_results('raw_data/mikelixiang.jsonl', name='mikeli'),
    load_results('raw_data/Ryan_{{HYr}}.jsonl', name='ryan'),
    load_results('raw_data/ziwei.jsonl', name='ziwei'),
])


In [15]:
# perform train_test_split
from sklearn.model_selection import train_test_split

# Define the grouping columns
group_cols = ['difficulty', 'originality', 'author', 'question_id']

# Group the DataFrame by the specified columns and get unique groups
groups = df.groupby(group_cols).apply(lambda x: x.index.tolist()).tolist()

# Perform train-test split on the groups
train_groups, test_groups = train_test_split(groups, test_size=0.2, random_state=42)

# Flatten the lists of groups into lists of indices
train_indices = [idx for group in train_groups for idx in group]
test_indices = [idx for group in test_groups for idx in group]

# Filter the original DataFrame based on train and test indices
train_df = df.loc[train_indices].drop_duplicates()
test_df = df.loc[test_indices].drop_duplicates()

# Save the train and test DataFrames to JSON files
output_data(train_df,"train_data.json")
output_data(test_df,"test_data.json")

print("Train and test data have been exported to 'train_data.json' and 'test_data.json'")

Train and test data have been exported to 'train_data.json' and 'test_data.json'
