In [1]:
%pip install matplotlib numpy pandas scikit-image

Note: you may need to restart the kernel to use updated packages.


In [33]:
from numpy import nan
from pandas import DataFrame, read_csv
from pathlib import Path

In [61]:
def merge_with_cols(df1: DataFrame | None, df2: DataFrame | None) -> DataFrame | None:
    """merge_with_cols(df1, df2) merges df1 and df2 by doing a full outer join by index, and adds together columns with the same name.

    Args:
        df1 (DataFrame | None): First dataframe to merge.
        df2 (DataFrame | None): Second dataframe to merge.

    Returns:
        DataFrame | None: The merged dataframe.
    """
    if df1 is None:
        return df2
    if df2 is None:
        return df1
    result = df1.merge(df2, 'outer', left_index=True, right_index=True)
    for c in df1.columns:
        if c not in df2.columns:
            continue
        c1, c2 = '%s_x' % c, '%s_y' % c
        result[c] = result[c1].fillna(0) + result[c2].fillna(0)
        result.drop([c1, c2], axis=1, inplace=True)
    return result

In [83]:
def cleanup(df: DataFrame) -> DataFrame:
    """cleanup(df) cleans up df by removing NaN values.

    Args:
        df (DataFrame): The dataframe to cleanup.

    Returns:
        DataFrame: The dataframe.
    """
    df.drop((nan, nan), inplace=True)
    df.dropna(inplace=True)
    return df

In [90]:
results_dir = './data'
results_path = Path(results_dir)

values = ['snake', 'spider', 'bird', 'butterfly']

index_columns = ['correctAnswer', 'imageFile', 'tImage']
value_columns = ['%s_button.numClicks' % v for v in values]

full_results = None

for file in results_path.glob('*.csv'):
    data = read_csv(str(file))
    data.set_index(index_columns, inplace=True, drop=False)

    for c in value_columns:
        data[c] = data[c].map(0..__ne__)

    results = data[[]].copy()

    predicted = data[value_columns].idxmax(1).astype('string').map({value_columns[i]: values[i] for i in range(len(values))})
    results['count'] = 1
    results['correct'] = (data['correctAnswer'] == predicted).astype(int)
    results['slider'] = (data['slider.response'] - 1.) / 3.

    cleanup(results)
    full_results = merge_with_cols(full_results, results)

full_results['slider'] = full_results['slider'] / full_results['count']
full_results[['count', 'correct']] = full_results[['count', 'correct']].astype(int)
full_results.to_excel('result_count.xlsx', merge_cells=False)

  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)
  df.drop((nan, nan), inplace=True)


In [91]:
full_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,correct,slider
correctAnswer,imageFile,tImage,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bird,histograms/birds/Anna's_hummingbird_(41124).jpg,0.024,2,1,0.404167
bird,histograms/birds/Anna's_hummingbird_(41124).jpg,0.036,3,3,0.998457
bird,histograms/birds/Anna's_hummingbird_(41124).jpg,0.048,3,3,0.997531
bird,histograms/birds/Anna's_hummingbird_(41124).jpg,0.060,3,3,0.988889
bird,histograms/birds/Announcing_his_Presence_(23818253846).jpg,0.024,3,1,0.392284
...,...,...,...,...,...
spider,histograms/spiders/Jumping_Zebra_Spider_(Salticus_scenicus)_(2816594447).jpg,0.060,3,1,0.727778
spider,histograms/spiders/Lycosidae_Wolf_Spider_in_profile.jpg,0.024,3,0,0.054012
spider,histograms/spiders/Lycosidae_Wolf_Spider_in_profile.jpg,0.036,3,1,0.451235
spider,histograms/spiders/Lycosidae_Wolf_Spider_in_profile.jpg,0.048,3,1,0.333951
