In [9]:
import pandas as pd
import numpy as np
import re

# List of CSV files representing different tasks
csv_files = [
    './bar_results.csv',
    './pie_results.csv',
]

def clean_raw_answers(file_path, drop_na=False):
    """
    Clean raw answers from CSV file by extracting only the last list of numbers in the last sentence,
    dividing each number by 100 if it's greater than 1, setting the first number to 1, 
    and rounding all numbers to 2 decimal places.
    
    Parameters:
    file_path (str): Path to the CSV file
    drop_na (bool): Whether to drop rows with NaN in 'parsed_answers'
    
    Returns:
    pandas.DataFrame: DataFrame with cleaned parsed answers
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    def extract_digits(x):
        if pd.isna(x):
            return np.nan
        
        # Convert to string and split by sentence-ending punctuation
        x = str(x).strip().replace('\n', '')
        sentences = re.split(r'[.!?]\s+', x)
        last_sentence = sentences[-1] if sentences else ""
        
        # Extract the last list of numbers within square brackets
        match = re.search(r'\[([\d.,\s]+)\]', last_sentence)
        if match:
            # Parse numbers from the matched group and convert them to floats
            numbers = [float(num.strip()) for num in match.group(1).split(',')]
            
            # Apply transformations: divide by 100 if >1, set the first number to 1, and round
            adjusted_numbers = [num / 100 if num > 1 else num for num in numbers]
            adjusted_numbers[0] = 1.0  # Set the first number to 1
            rounded_numbers = [round(num, 2) for num in adjusted_numbers]
            
            return rounded_numbers
        else:
            return np.nan
    
    # Apply extraction and transformation logic to raw answers
    df['parsed_answers'] = df['raw_answers'].apply(extract_digits)

    if drop_na:
        # Drop rows with NaN in 'parsed_answers' if specified
        print(f"Row count before dropping NaN for {file_path}: {len(df)}")
        df = df.dropna(subset=['parsed_answers'])
        print(f"Row count after dropping NaN for {file_path}: {len(df)}")
    
    return df

def load_and_process_data(csv_files):
    """
    Load, process, and clean multiple CSV files, returning two dictionaries of DataFrames.
    One dictionary contains DataFrames with NaNs retained, and the other with NaNs dropped.
    
    Parameters:
    csv_files (list): List of file paths to CSV files.
    
    Returns:
    tuple: A tuple containing two dictionaries:
        - data_with_na (dict): DataFrames with NaNs retained.
        - data_no_na (dict): DataFrames with NaNs dropped.
    """
    data_with_na = {}
    data_no_na = {}
    
    for file_path in csv_files:
        # Process with NaNs retained
        df_with_na = clean_raw_answers(file_path, drop_na=False)
        
        # Process with NaNs dropped
        df_no_na = clean_raw_answers(file_path, drop_na=True)
        
        # Extract the task name from the file name
        task_name = file_path.split('/')[-1].replace('_results.csv', '')
        
        # Store both versions in respective dictionaries
        data_with_na[task_name] = df_with_na
        data_no_na[task_name] = df_no_na
    
    return data_with_na, data_no_na

# Load both versions of each CSV file
data_with_na, data_no_na = load_and_process_data(csv_files)

# Access specific DataFrames
df_bar_with_na = data_with_na.get('bar')
df_bar_no_na = data_no_na.get('bar')
df_pie_with_na = data_with_na.get('pie')
df_pie_no_na = data_no_na.get('pie')


Row count before dropping NaN for ./bar_results.csv: 105
Row count after dropping NaN for ./bar_results.csv: 95
Row count before dropping NaN for ./pie_results.csv: 105
Row count after dropping NaN for ./pie_results.csv: 97


In [10]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Set width to None to fit all content across the width
pd.set_option('display.max_colwidth', None)  # Show full content of each column

df_bar_with_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,parsed_answers
0,gpt4o,run_0,EXP2-Results/bar/bar_image_1.png,"[1.0, 0.04, 0.3, 0.09, 0.19]","[1.0, 0.05, 0.14, 0.03, 0.09]","[1.0, 0.05, 0.14, 0.03, 0.09]"
1,gpt4o,run_0,EXP2-Results/bar/bar_image_2.png,"[1.0, 0.14, 0.12, 0.04, 0.34]","[1.0, 0.39, 0.37, 0.11, 0.13]","[1.0, 0.39, 0.37, 0.11, 0.13]"
2,gpt4o,run_0,EXP2-Results/bar/bar_image_3.png,"[1.0, 0.16, 0.07, 0.17, 0.28]","[1.0, 0.11, 0.11, 0.17, 0.13]","[1.0, 0.11, 0.11, 0.17, 0.13]"
3,gpt4o,run_0,EXP2-Results/bar/bar_image_4.png,"[1.0, 0.29, 0.18, 0.14, 0.05]","[23.0, 11.0, 46.0, 9.0, 11.0]","[1.0, 0.11, 0.46, 0.09, 0.11]"
4,gpt4o,run_0,EXP2-Results/bar/bar_image_5.png,"[1.0, 0.23, 0.11, 0.22, 0.14]","[0.17, 0.13, 1.0, 0.17, 0.13]","[1.0, 0.13, 1.0, 0.17, 0.13]"
5,gpt4o,run_0,EXP2-Results/bar/bar_image_6.png,"[1.0, 0.15, 0.09, 0.11, 0.25]","[1.0, 0.32, 0.07, 0.11, 0.18]","[1.0, 0.32, 0.07, 0.11, 0.18]"
6,gpt4o,run_0,EXP2-Results/bar/bar_image_7.png,"[1.0, 0.15, 0.05, 0.23, 0.24]","[1.0, 0.08, 0.06, 0.05, 0.0]","[1.0, 0.08, 0.06, 0.05, 0.0]"
7,gpt4o,run_0,EXP2-Results/bar/bar_image_8.png,"[1.0, 0.04, 0.06, 0.23, 0.32]","[1.0, 0.34, 0.26, 0.09, 0.09]","[1.0, 0.34, 0.26, 0.09, 0.09]"
8,gpt4o,run_0,EXP2-Results/bar/bar_image_9.png,"[1.0, 0.17, 0.26, 0.15, 0.07]","[1.0, 0.03, 0.03, 0.00, 0.00]","[1.0, 0.03, 0.03, 0.0, 0.0]"
9,gpt4o,run_0,EXP2-Results/bar/bar_image_10.png,"[1.0, 0.22, 0.19, 0.06, 0.17]","[29.0, 25.0, 18.0, 12.0, 16.0]","[1.0, 0.25, 0.18, 0.12, 0.16]"


In [11]:
df_bar_no_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,parsed_answers
0,gpt4o,run_0,EXP2-Results/bar/bar_image_1.png,"[1.0, 0.04, 0.3, 0.09, 0.19]","[1.0, 0.05, 0.14, 0.03, 0.09]","[1.0, 0.05, 0.14, 0.03, 0.09]"
1,gpt4o,run_0,EXP2-Results/bar/bar_image_2.png,"[1.0, 0.14, 0.12, 0.04, 0.34]","[1.0, 0.39, 0.37, 0.11, 0.13]","[1.0, 0.39, 0.37, 0.11, 0.13]"
2,gpt4o,run_0,EXP2-Results/bar/bar_image_3.png,"[1.0, 0.16, 0.07, 0.17, 0.28]","[1.0, 0.11, 0.11, 0.17, 0.13]","[1.0, 0.11, 0.11, 0.17, 0.13]"
3,gpt4o,run_0,EXP2-Results/bar/bar_image_4.png,"[1.0, 0.29, 0.18, 0.14, 0.05]","[23.0, 11.0, 46.0, 9.0, 11.0]","[1.0, 0.11, 0.46, 0.09, 0.11]"
4,gpt4o,run_0,EXP2-Results/bar/bar_image_5.png,"[1.0, 0.23, 0.11, 0.22, 0.14]","[0.17, 0.13, 1.0, 0.17, 0.13]","[1.0, 0.13, 1.0, 0.17, 0.13]"
5,gpt4o,run_0,EXP2-Results/bar/bar_image_6.png,"[1.0, 0.15, 0.09, 0.11, 0.25]","[1.0, 0.32, 0.07, 0.11, 0.18]","[1.0, 0.32, 0.07, 0.11, 0.18]"
6,gpt4o,run_0,EXP2-Results/bar/bar_image_7.png,"[1.0, 0.15, 0.05, 0.23, 0.24]","[1.0, 0.08, 0.06, 0.05, 0.0]","[1.0, 0.08, 0.06, 0.05, 0.0]"
7,gpt4o,run_0,EXP2-Results/bar/bar_image_8.png,"[1.0, 0.04, 0.06, 0.23, 0.32]","[1.0, 0.34, 0.26, 0.09, 0.09]","[1.0, 0.34, 0.26, 0.09, 0.09]"
8,gpt4o,run_0,EXP2-Results/bar/bar_image_9.png,"[1.0, 0.17, 0.26, 0.15, 0.07]","[1.0, 0.03, 0.03, 0.00, 0.00]","[1.0, 0.03, 0.03, 0.0, 0.0]"
9,gpt4o,run_0,EXP2-Results/bar/bar_image_10.png,"[1.0, 0.22, 0.19, 0.06, 0.17]","[29.0, 25.0, 18.0, 12.0, 16.0]","[1.0, 0.25, 0.18, 0.12, 0.16]"
