In [3]:
import pandas as pd
import numpy as np
import re
import os

# List of CSV files representing different tasks
csv_files = [
    './framed_results.csv',
    './unframed_results.csv',
]

def clean_raw_answers(file_path, drop_na=False):
    """
    Clean raw answers by extracting lists of numbers or handling cases with two consecutive numbers
    or the word "both," optionally dropping rows with None in 'parsed_answers'.
    
    Parameters:
    file_path (str): Path to the CSV file
    drop_na (bool): Whether to drop rows with None in 'parsed_answers'
    
    Returns:
    tuple: (cleaned DataFrame, DataFrame with None rows in 'parsed_answers')
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    def extract_digits(x, model_name):
        if pd.isna(x):
            return None
        
        x = str(x).strip().replace('\n', '')
        sentences = re.split(r'[.!?]\s+', x)
        last_sentence = sentences[-1] if sentences else ""
        
        # First, look for list in square brackets
        match = re.search(r'\[([\d.,\s]+)\]', last_sentence)
        if match:
            numbers = [float(num.strip()) for num in match.group(1).split(',')]
            return numbers
        
        # For LLaMA model, get last two numbers
        elif model_name == "LLaMA":
            digit_matches = re.findall(r'\b(\d+)\b', last_sentence)
            if len(digit_matches) >= 2:
                return [float(digit_matches[-2]), float(digit_matches[-1])]
        
        # Handle "both" cases
        elif "both" in last_sentence:
            both_match_two_numbers = re.findall(r'both\s+(\d+)\s+and\s+(\d+)', last_sentence)
            if both_match_two_numbers:
                return [float(both_match_two_numbers[0][0]), float(both_match_two_numbers[0][1])]
            both_match_single = re.search(r'both\s+(\d+)', last_sentence)
            if both_match_single:
                value = float(both_match_single.group(1))
                return [value, value]
        
        return None

    # Apply extraction logic to raw answers
    df['parsed_answers'] = df.apply(lambda row: extract_digits(row['raw_answers'], row['model_name']), axis=1)
    
    # Separate rows with None in 'parsed_answers'
    none_rows = df[df['parsed_answers'].isna()].copy()
    
    # Optionally drop None rows
    if drop_na:
        print(f"Row count before dropping None for {file_path}: {len(df)}")
        df = df.dropna(subset=['parsed_answers'])
        print(f"Row count after dropping None for {file_path}: {len(df)}")
    
    return df, none_rows

def load_and_process_data(csv_files):
    """
    Load, process, and clean multiple CSV files, returning two dictionaries of DataFrames
    and a combined DataFrame of all None rows across tasks.
    
    Parameters:
    csv_files (list): List of CSV file paths
    
    Returns:
    tuple: A tuple containing:
           - data_with_na (dict): DataFrames with None rows retained
           - data_no_na (dict): DataFrames with None rows dropped
           - none_rows_df (DataFrame): Combined DataFrame of rows with None values in 'parsed_answers'
    """
    data_with_na = {}
    data_no_na = {}
    none_rows_list = []
    
    for file_path in csv_files:
        try:
            # Process each file, returning both versions
            df_with_na, none_rows = clean_raw_answers(file_path, drop_na=False)
            df_no_na, _ = clean_raw_answers(file_path, drop_na=True)
            
            # Task name based on filename
            task_name = os.path.basename(file_path).replace('_results.csv', '')
            
            # Store both versions in dictionaries
            data_with_na[task_name] = df_with_na
            data_no_na[task_name] = df_no_na
            
            # Append None rows with task name added
            if not none_rows.empty:
                none_rows['task_name'] = task_name
                none_rows_list.append(none_rows)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    # Concatenate all None rows into a single DataFrame
    none_rows_df = pd.concat(none_rows_list, ignore_index=True) if none_rows_list else pd.DataFrame()
    
    return data_with_na, data_no_na, none_rows_df

# Load and process each CSV file
data_with_na, data_no_na, none_rows_df = load_and_process_data(csv_files)

# Access cleaned DataFrames by task name
df_framed_with_na = data_with_na.get('framed')
df_framed_no_na = data_no_na.get('framed')
df_unframed_with_na = data_with_na.get('unframed')
df_unframed_no_na = data_no_na.get('unframed')


Row count before dropping None for ./framed_results.csv: 305
Row count after dropping None for ./framed_results.csv: 297
Row count before dropping None for ./unframed_results.csv: 305
Row count after dropping None for ./unframed_results.csv: 300


In [4]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Set width to None to fit all content across the width
pd.set_option('display.max_colwidth', None)  # Show full content of each column

df_framed_with_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,forced_repetitions,parsed_answers
0,gpt4o,run_0,EXP4-Results/framed/framed_image_1.png,"[58, 55]","[58, 52]",0,"[58.0, 52.0]"
1,gpt4o,run_0,EXP4-Results/framed/framed_image_2.png,"[57, 56]","[55, 59]",0,"[55.0, 59.0]"
2,gpt4o,run_0,EXP4-Results/framed/framed_image_3.png,"[52, 58]","[52, 57]",0,"[52.0, 57.0]"
3,gpt4o,run_0,EXP4-Results/framed/framed_image_4.png,"[51, 49]","[60, 49]",0,"[60.0, 49.0]"
4,gpt4o,run_0,EXP4-Results/framed/framed_image_5.png,"[55, 56]","[50, 58]",0,"[50.0, 58.0]"
5,gpt4o,run_0,EXP4-Results/framed/framed_image_6.png,"[49, 56]","[50, 55]",0,"[50.0, 55.0]"
6,gpt4o,run_0,EXP4-Results/framed/framed_image_7.png,"[49, 59]","[58, 51]",0,"[58.0, 51.0]"
7,gpt4o,run_0,EXP4-Results/framed/framed_image_8.png,"[54, 53]","[50, 55]",0,"[50.0, 55.0]"
8,gpt4o,run_0,EXP4-Results/framed/framed_image_9.png,"[58, 50]","[58, 52]",0,"[58.0, 52.0]"
9,gpt4o,run_0,EXP4-Results/framed/framed_image_10.png,"[51, 50]","[53, 60]",0,"[53.0, 60.0]"


In [5]:
df_framed_no_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,forced_repetitions,parsed_answers
0,gpt4o,run_0,EXP4-Results/framed/framed_image_1.png,"[58, 55]","[58, 52]",0,"[58.0, 52.0]"
1,gpt4o,run_0,EXP4-Results/framed/framed_image_2.png,"[57, 56]","[55, 59]",0,"[55.0, 59.0]"
2,gpt4o,run_0,EXP4-Results/framed/framed_image_3.png,"[52, 58]","[52, 57]",0,"[52.0, 57.0]"
3,gpt4o,run_0,EXP4-Results/framed/framed_image_4.png,"[51, 49]","[60, 49]",0,"[60.0, 49.0]"
4,gpt4o,run_0,EXP4-Results/framed/framed_image_5.png,"[55, 56]","[50, 58]",0,"[50.0, 58.0]"
5,gpt4o,run_0,EXP4-Results/framed/framed_image_6.png,"[49, 56]","[50, 55]",0,"[50.0, 55.0]"
6,gpt4o,run_0,EXP4-Results/framed/framed_image_7.png,"[49, 59]","[58, 51]",0,"[58.0, 51.0]"
7,gpt4o,run_0,EXP4-Results/framed/framed_image_8.png,"[54, 53]","[50, 55]",0,"[50.0, 55.0]"
8,gpt4o,run_0,EXP4-Results/framed/framed_image_9.png,"[58, 50]","[58, 52]",0,"[58.0, 52.0]"
9,gpt4o,run_0,EXP4-Results/framed/framed_image_10.png,"[51, 50]","[53, 60]",0,"[53.0, 60.0]"
