In [1]:
import json,tiktoken,re
import pandas as pd
import copy
import random
import numpy as np
import os

In [2]:
res_per_model={
        "ID":[],
        "if":{
            "S":[],
            "R":[],
            "I":[]
            },
        "score":{
            "S":[],
            "R":[],
            "I":[]
            },        
        "coverage":{
            "S":[],
            "R":[],
            "I":[],
            "union":[],
            "inter":[]
            }
        }

In [3]:
def compute_coverage(text):
    bracketed_parts = re.findall(r'<<<(.*?)>>>', text)
    
    bracketed_length = sum(len(part) for part in bracketed_parts)
    
    total_length = len(text) - text.count('<<<') * 3 - text.count('>>>') * 3
    
    ratio = bracketed_length / total_length if total_length > 0 else 0
    
    return ratio

In [4]:
def extract_bracketed_positions(text, reference_text):
    pattern = r'<<<(.*?)>>>'
    matches = re.finditer(pattern, text)
    positions = []
    
    for match in matches:
        start, end = match.span(1)  
        start_ref = reference_text.find(match.group(1))
        if start_ref != -1:
            end_ref = start_ref + (end - start)
            positions.append((start_ref, end_ref))
    
    return positions

def union_bracketed_positions(positions, length):
    merged = [False] * length
    for start, end in positions:
        for i in range(start, end):
            if i < length:  
                merged[i] = True
    return merged

def inter_bracketed_positions(all_positions, length):
    merged = [True] * length
    
    for positions in all_positions:
        current_positions = [False] * length
        for start, end in positions:
            for i in range(start, end):
                if i < length:  
                    current_positions[i] = True
        merged = [m and c for m, c in zip(merged, current_positions)]
        
    return merged

def generate_text_with_brackets(original_text, merged_positions):
    result = []
    inside_bracket = False
    for i, flag in enumerate(merged_positions):
        if flag and not inside_bracket:
            result.append("<<<")
            inside_bracket = True
        elif not flag and inside_bracket:
            result.append(">>>")
            inside_bracket = False
        result.append(original_text[i])
    if inside_bracket:
        result.append(">>>")
    return ''.join(result)


def find_bracketed_content_union(texts, original_sentence):
    all_positions = []
    for text in texts:
        positions = extract_bracketed_positions(text, original_sentence)
        all_positions.extend(positions)

    merged_positions = union_bracketed_positions(all_positions, len(original_sentence))
    return generate_text_with_brackets(original_sentence, merged_positions)

def find_bracketed_content_inter(texts, original_sentence):
    all_positions = [extract_bracketed_positions(text, original_sentence) for text in texts]

    merged_positions = inter_bracketed_positions(all_positions, len(original_sentence))
    return generate_text_with_brackets(original_sentence, merged_positions)



In [5]:
with open('IDs1000.txt', 'r') as file:
    IDs1000 = [int(line.strip()) for line in file]

In [6]:
exp_directory="./dataset"

In [7]:
res={}
# sample 1000
for root, dirs, files in os.walk(exp_directory):
    for file in files:
        if "label" in file:
            file_path=os.path.join(root, file)
        else: continue

        model_name=re.search(r'\/([^\/]+)_label', file_path).group(1)
        res[model_name]=copy.deepcopy(res_per_model)
        tmpRes=res[model_name]
        with open(file_path, 'r') as file:
            for line in file:
                # Convert each line into a dictionary
                data = json.loads(line)
                if data['ID'] not in IDs1000 or not ('ifS' in data and 'ifR' in data and 'ifI' in data):
                    continue
                texts = []
                if 'ifS' in data:
                    tmpRes['if']['S'].append(data['ifS'])
                    if data['ifS'] == 'yes':
                        tmpRes['score']['S'].append(data['scoreS'])
                        tmpRes['coverage']['S'].append(compute_coverage(data['S']))
                        texts.append(data['S'])
                    else:
                        tmpRes['score']['S'].append(0)
                        tmpRes['coverage']['S'].append(0)                    
                if 'ifR' in data:
                    tmpRes['if']['R'].append(data['ifR'])
                    if data['ifR'] == 'yes':
                        tmpRes['score']['R'].append(data['scoreR'])
                        tmpRes['coverage']['R'].append(compute_coverage(data['R']))
                        texts.append(data['R'])
                    else:
                        tmpRes['score']['R'].append(0)
                        tmpRes['coverage']['R'].append(0)
                if 'ifI' in data:
                    tmpRes['if']['I'].append(data['ifI'])
                    if data['ifI'] == 'yes':
                        tmpRes['score']['I'].append(data['scoreI'])
                        tmpRes['coverage']['I'].append(compute_coverage(data['I']))
                        texts.append(data['I'])
                    else:
                        tmpRes['score']['I'].append(0)
                        tmpRes['coverage']['I'].append(0)
                if len(texts)>0:
                    union=find_bracketed_content_union(texts,data['answer'])
                    tmpRes['coverage']['union'].append(compute_coverage(union))
                    inter=find_bracketed_content_inter(texts,data['answer'])
                    tmpRes['coverage']['inter'].append(compute_coverage(inter))
                else:
                    tmpRes['coverage']['union'].append(0)
                    tmpRes['coverage']['inter'].append(0)            

In [19]:
resfinal={}
for model in res.keys():
    resfinal[model]={}
    tmpRes=resfinal[model]
    tmpRes["Overall_union_rate"]=100*sum([a=='yes' or b=='yes' or c=='yes' for a,b,c in zip(res[model]["if"]["S"],res[model]["if"]["R"],res[model]["if"]["I"])])/len(res[model]["if"]["S"])
    tmpRes["Overall_score"]=sum(np.array(res[model]["score"]["S"])+np.array(res[model]["score"]["R"])+np.array(res[model]["score"]["I"]))/len(res[model]["if"]["S"])
    tmpRes["Overall_union_coverage"]=100*sum(res[model]["coverage"]["union"])/len(res[model]["if"]["S"])
    tmpRes["S_rate"]=100*sum(np.array(res[model]["if"]["S"])=="yes")/len(res[model]["if"]["S"])
    tmpRes["R_rate"]=100*sum(np.array(res[model]["if"]["R"])=="yes")/len(res[model]["if"]["R"])
    tmpRes["I_rate"]=100*sum(np.array(res[model]["if"]["I"])=="yes")/len(res[model]["if"]["I"])
    tmpRes["S_score"]=sum(res[model]["score"]["S"])/len(res[model]["if"]["S"])
    tmpRes["R_score"]=sum(res[model]["score"]["R"])/len(res[model]["if"]["R"])
    tmpRes["I_score"]=sum(res[model]["score"]["I"])/len(res[model]["if"]["I"])
    tmpRes["S_coverage"]=100*sum(res[model]["coverage"]["S"])/len(res[model]["if"]["S"])
    tmpRes["R_coverage"]=100*sum(res[model]["coverage"]["R"])/len(res[model]["if"]["R"])
    tmpRes["I_coverage"]=100*sum(res[model]["coverage"]["I"])/len(res[model]["if"]["I"])


In [20]:

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', None)

pd.set_option('display.max_colwidth', 100)

pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [None]:
df=pd.DataFrame(resfinal)
df.T.sort_values(by=['Overall_union_rate'], ascending=[True])