In [1]:
import pandas as pd
import numpy as np
import re
import os

# List of CSV files representing different tasks
csv_files = [
    './type1_results.csv',
    './type2_results.csv',
    './type3_results.csv',
    './type4_results.csv',
    './type5_results.csv',
]


def clean_raw_answers(file_path, drop_na=False):
    """
    Cleans raw answers by extracting the last decimal or fraction value, evaluating fractions, 
    and rounding to 2 decimal places. Optionally drops NaN values from the 'parsed_answer' column.
    
    Parameters:
    file_path (str): Path to the CSV file
    drop_na (bool): Whether to drop rows with NaN in 'parsed_answer'
    
    Returns:
    pandas.DataFrame: DataFrame with cleaned parsed answers
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    def extract_last_decimal_or_fraction(raw_answer):
        raw_answer = str(raw_answer)
        fraction_matches = re.findall(r'\d+/\d+', raw_answer)
        decimal_matches = re.findall(r'0\.\d{1,3}', raw_answer)
        all_matches = fraction_matches + decimal_matches
        
        if all_matches:
            last_match = all_matches[-1]
            if '/' in last_match:
                fraction_value = eval(last_match)
                return float(f"{fraction_value:.2f}".rstrip('0').rstrip('.'))
            else:
                return float(last_match.rstrip('0').rstrip('.'))
        
        return np.nan
    
    # Apply extraction to the 'raw_answers' column
    df['parsed_answer'] = df['raw_answers'].apply(extract_last_decimal_or_fraction)
    
    # Drop NaN values in 'parsed_answer' if specified
    if drop_na:
        print(f"Row count before dropping NaN for {file_path}: {len(df)}")
        df = df.dropna(subset=['parsed_answer'])
        print(f"Row count after dropping NaN for {file_path}: {len(df)}")
    
    return df

def load_and_process_data(csv_files):
    """
    Processes each CSV file to create both NaN-retained and NaN-dropped versions, 
    and returns two dictionaries containing the DataFrames.
    
    Parameters:
    csv_files (list): List of CSV file paths
    
    Returns:
    tuple: Two dictionaries containing the cleaned DataFrames:
           - data_with_na (dict): DataFrames with NaNs retained
           - data_no_na (dict): DataFrames with NaNs dropped
    """
    data_with_na = {}
    data_no_na = {}
    
    for file_path in csv_files:
        try:
            # Clean CSV with NaNs retained
            df_with_na = clean_raw_answers(file_path, drop_na=False)
            
            # Clean CSV with NaNs dropped
            df_no_na = clean_raw_answers(file_path, drop_na=True)
            
            # Extract task name from file name
            task_name = os.path.basename(file_path).replace('_results.csv', '')
            
            # Store both versions in respective dictionaries
            data_with_na[task_name] = df_with_na
            data_no_na[task_name] = df_no_na
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    return data_with_na, data_no_na

# Load and process each CSV file
data_with_na, data_no_na = load_and_process_data(csv_files)

# Access each cleaned DataFrame by task name
df_type1_with_na = data_with_na.get('type1')
df_type1_no_na = data_no_na.get('type1')
df_type2_with_na = data_with_na.get('type2')
df_type2_no_na = data_no_na.get('type2')
df_type3_with_na = data_with_na.get('type3')
df_type3_no_na = data_no_na.get('type3')
df_type4_with_na = data_with_na.get('type4')
df_type4_no_na = data_no_na.get('type4')
df_type5_with_na = data_with_na.get('type5')
df_type5_no_na = data_no_na.get('type5')

Row count before dropping NaN for ./type1_results.csv: 305
Row count after dropping NaN for ./type1_results.csv: 299
Row count before dropping NaN for ./type2_results.csv: 305
Row count after dropping NaN for ./type2_results.csv: 299
Row count before dropping NaN for ./type3_results.csv: 305
Row count after dropping NaN for ./type3_results.csv: 300
Row count before dropping NaN for ./type4_results.csv: 305
Row count after dropping NaN for ./type4_results.csv: 298
Row count before dropping NaN for ./type5_results.csv: 305
Row count after dropping NaN for ./type5_results.csv: 298


In [2]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Set width to None to fit all content across the width
pd.set_option('display.max_colwidth', None)  # Show full content of each column

df_type1_with_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,forced_repetitions,parsed_answer
0,gpt4o,run_0,EXP3-Results/type1/type1_image_1.png,0.666667,0.75,0,0.75
1,gpt4o,run_0,EXP3-Results/type1/type1_image_2.png,0.263158,0.8,0,0.8
2,gpt4o,run_0,EXP3-Results/type1/type1_image_3.png,0.394737,0.5,0,0.5
3,gpt4o,run_0,EXP3-Results/type1/type1_image_4.png,0.576923,0.7,0,0.7
4,gpt4o,run_0,EXP3-Results/type1/type1_image_5.png,0.666667,0.5,0,0.5
5,gpt4o,run_0,EXP3-Results/type1/type1_image_6.png,0.578947,0.7,0,0.7
6,gpt4o,run_0,EXP3-Results/type1/type1_image_7.png,0.833333,Approximately 0.6.,0,0.6
7,gpt4o,run_0,EXP3-Results/type1/type1_image_8.png,0.46875,0.7,0,0.7
8,gpt4o,run_0,EXP3-Results/type1/type1_image_9.png,0.461538,Approximately 0.6.,0,0.6
9,gpt4o,run_0,EXP3-Results/type1/type1_image_10.png,0.315789,0.8,0,0.8


In [3]:
df_type1_no_na

Unnamed: 0,model_name,run,image_path,ground_truth,raw_answers,forced_repetitions,parsed_answer
0,gpt4o,run_0,EXP3-Results/type1/type1_image_1.png,0.666667,0.75,0,0.75
1,gpt4o,run_0,EXP3-Results/type1/type1_image_2.png,0.263158,0.8,0,0.8
2,gpt4o,run_0,EXP3-Results/type1/type1_image_3.png,0.394737,0.5,0,0.5
3,gpt4o,run_0,EXP3-Results/type1/type1_image_4.png,0.576923,0.7,0,0.7
4,gpt4o,run_0,EXP3-Results/type1/type1_image_5.png,0.666667,0.5,0,0.5
5,gpt4o,run_0,EXP3-Results/type1/type1_image_6.png,0.578947,0.7,0,0.7
6,gpt4o,run_0,EXP3-Results/type1/type1_image_7.png,0.833333,Approximately 0.6.,0,0.6
7,gpt4o,run_0,EXP3-Results/type1/type1_image_8.png,0.46875,0.7,0,0.7
8,gpt4o,run_0,EXP3-Results/type1/type1_image_9.png,0.461538,Approximately 0.6.,0,0.6
9,gpt4o,run_0,EXP3-Results/type1/type1_image_10.png,0.315789,0.8,0,0.8
