In [1]:
import pandas as pd
import re
LLM_models = ['GPT4','GPT3.5', 'GeminiUltra']



# 정답 데이터
ground_truth_data = pd.read_csv('/home/shyuni5/file/CORL2024/Sembot/IP_gen/ground_truth.csv')

def parse_data(data, goal_number, goal_content):
    plan_pattern = re.compile(r'\[Plan (\d+)\] ([^\n]+)\n(.*?)(?=\[Plan \d+|\Z)', re.DOTALL)
    key_value_pattern = re.compile(r'{(.*?):(.*?)}')
    plans_data = []
    whole_key_value_data = {key.strip(): value.strip() for key, value in key_value_pattern.findall(data)}
    robot_speed_value = whole_key_value_data.get("Robot speed", "Key not found")

    for plan_match in plan_pattern.finditer(data):
        plan_number, plan_description, plan_content = plan_match.groups()
        key_value_data = dict(key_value_pattern.findall(plan_content))
        key_value_data = {key.strip(): value.strip() for key, value in key_value_pattern.findall(plan_content)}

        plan_data = {
            'Plan #': int(plan_number),
            'Plan Description': plan_description,
            'Goal #': goal_number,
            'Goal Description': goal_content,
            'Robot speed':robot_speed_value.strip(),
            **key_value_data  # '{key: value}' 쌍 추가
        }

        plans_data.append(plan_data)

    return plans_data


# Scoring function that checks multiple possible correct answers
def score_answers(raw_data, ground_truth):
    scores = []
    total = []
    for index, raw_plan in raw_data.iterrows():
        score_row = {'Robot speed':0,'Grip Orientation':0, 'Gripper grabbing position':0,
                        'Gripper orientation':0, 'Gripper strength':0, 'Placing height':0}
        total_row = {'Robot speed':0,'Grip Orientation':0, 'Gripper grabbing position':0,
                        'Gripper orientation':0, 'Gripper strength':0, 'Placing height':0}
        
        matching_rows = ground_truth[
            (ground_truth['Goal Number'].astype(str).str.startswith(str(raw_plan['Goal #']))) & 
            (ground_truth['Plan'].astype(str) == 'Plan ' + str(raw_plan['Plan #']))
        ]
        
        for key in ['Robot speed', 'Grip Orientation', 'Gripper grabbing position',
                    'Gripper orientation', 'Gripper strength', 'Placing height']:
            flag = False
            for _, truth_row in matching_rows.iterrows(): # 4-1 /4-2
                raw_value = raw_plan[key]
                correct_answers = str(truth_row[key]).split('/') # horizontal/vertical
                if correct_answers[0] !='nan' and not flag:
                    total_row[key] +=1
                    flag = True
                else:
                    break
            
                if any(raw_value == correct for correct in correct_answers):
                    score_row[key] += 1
                    break
                else:
                    print(f"Incorrect answer:: key: {key}")
        scores.append(score_row)
        total.append(total_row)
        
    return pd.DataFrame(scores),  pd.DataFrame(total)


In [2]:
parsed_raw_data_list = []
for LLM_model in LLM_models:    
    with open(f'{LLM_model}/results.txt', 'r') as file:
        raw_data = file.read()
    goals_raw_data = raw_data.split('[Goal')[1:]
    goal_pattern = re.compile(r'\[Goal \d+\] : ([^\n]+)')
    goal_matches = goal_pattern.findall(raw_data)
    goal_contents = []

    for goal in goal_matches:
        goal_contents.append(goal)

    parsed_raw_data = []
    for i, goal_raw_data in enumerate(goals_raw_data):
        parsed_raw_data.extend(parse_data(goal_raw_data, i+1, goal_contents[i]))

    parsed_raw_data_list.append(parsed_raw_data)

# 파싱된 데이터를 DataFrame으로 변환 및 저장
df_raw_list = []
for parsed_raw_data in parsed_raw_data_list:
    df_raw = pd.DataFrame(parsed_raw_data)
    df_raw.to_csv(f'{LLM_model}/parsed_data.csv', index=False)
    df_raw_list.append(df_raw)


In [15]:
# Score the parsed raw data against the ground truth
total_score_df = pd.DataFrame()
for i, df_raw in enumerate(df_raw_list):
    scored_data, total_data = score_answers(df_raw, ground_truth_data)

    # Calculate accuracy rates
    score_data = scored_data.sum()/total_data.sum()

    score_data_df = pd.DataFrame([score_data])
    score_data_df['Average'] = score_data_df[['Grip Orientation', 'Gripper grabbing position',
                    'Gripper orientation', 'Gripper strength', 'Placing height']].mean().mean()
    score_data_df['LLM'] = LLM_models[i]

    total_score_df = pd.concat([total_score_df, score_data_df])
    total_score_df.to_csv('total_score.csv', index=False)


In [16]:
total_score_df

Unnamed: 0,Robot speed,Grip Orientation,Gripper grabbing position,Gripper orientation,Gripper strength,Placing height,Average,LLM
0,1.0,0.785714,0.642857,0.928571,0.928571,0.428571,0.742857,GPT4
0,0.846154,0.692308,0.692308,0.615385,0.923077,0.307692,0.646154,GPT3.5
0,1.0,0.545455,0.454545,0.636364,0.818182,0.416667,0.574242,GeminiUltra
