In [9]:
import pandas as pd 
import json 

In [10]:
def read_file(filepath):
    with open(filepath, 'r') as fp:
        data = json.load(fp)
    return data

In [11]:
def make_df_refmatters(filepath):
    data = read_file(filepath)
    error_type_map = {'PredE': 'PredE', 
                      'EntE': 'SubjObjE', 
                      'OutE': 'OutE', 
                      'CorefE': 'CorefE', 
                      'GramE' : 'GramE',
                     'CircE': 'CircE',
                     'LinkE': 'LinkE', 
                     'GramE': 'GramE', 
                     'OthE': 'OthE'}
    
    data_dict = {'DocID': [],
                 'Dialogue': [],
                 'Model': [],
                 'Summary': [],
                 'PredE': [],
                 'PredE_text': [],
                 'SubjObjE': [], 
                 'SubjObjE_text': [],
                 'OutE': [], 
                 'OutE_text': [], 
                 'CorefE': [], 
                 'CorefE_text': [], 
                 'GramE' : [],
                 'GramE_text' : [],
                 'CircE': [],
                 'CircE_text': [],
                 'LinkE': [], 
                 'LinkE_text': [], 
                 'GramE': [], 
                 'GramE_text': [], 
                 'OthE': [],
                'OthE_text': [],
                'w/ Error': []}
                 
    
    for doc in data:
        DocID = doc['id']
        Dialogue = doc['dialogue']
        for model, model_summary_info in doc['model_summaries'].items():
            
            model_summary = model_summary_info['original_summary']
            factual_label = model_summary_info['consistency']
            factual_label = 1 if factual_label != True else 0
            
            append_error_info = {}
            for k ,v in error_type_map.items():
                append_error_info[v] = 'no'
                append_error_info[f'{v}_text'] = None
                
            for errors in model_summary_info['error_categories']:
                error_text = errors['text']
                error_type = error_type_map[errors['type']]
                append_error_info[error_type] = 'yes'
                append_error_info[f'{error_type}_text']= error_text
    
            data_dict['DocID'].append(DocID)
            data_dict['Dialogue'].append(Dialogue)
            data_dict['Model'].append(model)
            data_dict['Summary'].append(model_summary)
            data_dict['w/ Error'].append(factual_label)
    
            for err, errval in append_error_info.items():
                data_dict[err].append(errval)
    return pd.DataFrame(data_dict)
        
    

In [12]:
filepath = '/home/sanjana/factual_evaluation_source_based/dataset_creators/notebooks/reference_matters/SAMSum/train.json'
df = make_df_refmatters(filepath)
df.to_csv('/home/sanjana/factual_evaluation_source_based/datasets/sota_annotations/RefMatters_SAMSum_train.csv')

filepath = '/home/sanjana/factual_evaluation_source_based/dataset_creators/notebooks/reference_matters/DialogSum/train.json'
df = make_df_refmatters(filepath)
df.to_csv('/home/sanjana/factual_evaluation_source_based/datasets/sota_annotations/RefMatters_DialogSum_train.csv')

