In [1]:
import numpy as np
import pandas as pd

In [2]:
with open('./eval.txt', 'r') as f:
    results = f.readlines()

# Remove any non-result lines from the eval file, and split the lines on the tab character
# (results have format: model_name\tdataset_name\tmetric_name\tmetric_value)
results = [r.replace('\n','').split('\t') for r in results if '\t' in r]

In [3]:
def remove_underscore_after(val): # Remove underscore, and keep the part after the underscore
    return val.split('_')[-1]
def remove_underscore_before(val): # Remove underscore, and keep the part before the underscore
    return val.split('_')[0]
def map2d(func, grid): # Mapping for 2d arrays, from: https://stackoverflow.com/questions/70742445/elegant-map-over-2d-list
    return [[func(value) for value in row] for row in grid]

df = pd.DataFrame(results, columns = ['model', 'dataset', 'metric', 'value'])
df['value'] = pd.to_numeric(df['value'])
df['model'] = df['model'].map(lambda x: '-'.join(x.split('-')[2:]))
models = df['model'].str.split('-').tolist()

# Remove all underscores from our 2d list, keep one list of the param names and one with param vals
model_names_list = map2d(remove_underscore_before, models)
model_names = np.array(model_names_list)
model_val_list = map2d(remove_underscore_after, models)
model_vals = np.array(model_val_list)

In [4]:
model_vals.shape, model_names.shape

((661, 12), (661, 12))

In [5]:
# Splitting model name into more columns
for i in range(model_names.shape[-1]):
    name = model_names[0][i]
    val = model_vals[:,i]
    df[name] = val

cols = sorted(df.columns.tolist())
# The ratio have the format: 'ratio_xxx' where xxx is some float.
# We only want that float value, and need to convert it from string to float.
df['ratio'] = pd.to_numeric(df['ratio'])
display(df)

# Group by all columns except the performance values, then compute mean, std and number of model runs for the performance
df_grouped = df.groupby(list(set(cols)-set(['value']))).agg({'value':['mean', 'std', 'count']})

Unnamed: 0,model,dataset,metric,value,data,ALL,ratio,method,kw,AL,PL,vit,epochs,lr,bs
0,RN50,RSICD-CLS,zeroshot-val-top1,3.93,RS,ALL,0.1,base,none,False,,False,5,0.0005,64
1,RN50,UCM-CLS,zeroshot-val-top1,5.71,RS,ALL,0.1,base,none,False,,False,5,0.0005,64
2,RN50,RSICD,image_to_text_R@1,0.09,RS,ALL,0.1,base,none,False,,False,5,0.0005,64
3,RN50,RSICD,image_to_text_R@5,0.55,RS,ALL,0.1,base,none,False,,False,5,0.0005,64
4,RN50,RSICD,image_to_text_R@10,1.01,RS,ALL,0.1,base,none,False,,False,5,0.0005,64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,RN50,Sydney,image_to_text_R@5,3.62,RS,ALL,0.1,base,none,False,,False,15,0.0005,64
657,RN50,Sydney,image_to_text_R@10,7.04,RS,ALL,0.1,base,none,False,,False,15,0.0005,64
658,RN50,Sydney,text_to_image_R@1,1.21,RS,ALL,0.1,base,none,False,,False,15,0.0005,64
659,RN50,Sydney,text_to_image_R@5,2.21,RS,ALL,0.1,base,none,False,,False,15,0.0005,64


In [6]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df_grouped)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,mean,std,count
bs,lr,data,AL,kw,dataset,metric,model,ratio,epochs,ALL,PL,vit,method,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
128,5e-06,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,1.664,0.2002,5
128,5e-06,RS,False,none,RSICD,image_to_text_R@10,RN50,0.1,5,ALL,,False,base,9.832,0.519683,5
128,5e-06,RS,False,none,RSICD,image_to_text_R@5,RN50,0.1,5,ALL,,False,base,5.282,0.374526,5
128,5e-06,RS,False,none,RSICD,text_to_image_R@1,RN50,0.1,5,ALL,,False,base,1.024,0.121984,5
128,5e-06,RS,False,none,RSICD,text_to_image_R@10,RN50,0.1,5,ALL,,False,base,8.262,0.239312,5
128,5e-06,RS,False,none,RSICD,text_to_image_R@5,RN50,0.1,5,ALL,,False,base,4.624,0.273733,5
128,5e-06,RS,False,none,RSICD-CLS,zeroshot-val-top1,RN50,0.1,5,ALL,,False,base,24.224,0.524862,5
128,5e-06,RS,False,none,Sydney,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,1.73,0.228035,5
128,5e-06,RS,False,none,Sydney,image_to_text_R@10,RN50,0.1,5,ALL,,False,base,12.516,0.522905,5
128,5e-06,RS,False,none,Sydney,image_to_text_R@5,RN50,0.1,5,ALL,,False,base,6.922,0.645461,5


In [7]:
df_grouped.to_csv('eval.csv')

In [8]:
df_2 = df_grouped.reset_index()
# df_2 = df_2[(df_2['method'] == 'ours') & (df_2['ratio'] == 0.2) & (df_2['PL'] == 'ot.image')] # (df_2['metric'] == 'image_to_text_R@5') &
df_2 = df_2.sort_values(['metric', 'dataset', ('value', 'mean')])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_2)

Unnamed: 0_level_0,bs,lr,data,AL,kw,dataset,metric,model,ratio,epochs,ALL,PL,vit,method,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,mean,std,count
101,64,0.0005,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,0.09,0.0,5
40,128,0.0005,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,0.108,0.040249,5
100,64,0.0005,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,15,ALL,,False,base,0.46,,1
0,128,5e-06,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,1.664,0.2002,5
60,64,5e-06,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,1.884,0.136492,5
20,128,5e-05,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,3.216,0.30509,5
80,64,5e-05,RS,False,none,RSICD,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,3.955714,0.507834,7
47,128,0.0005,RS,False,none,Sydney,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,0.2,0.141421,5
115,64,0.0005,RS,False,none,Sydney,image_to_text_R@1,RN50,0.1,5,ALL,,False,base,0.2,0.0,5
114,64,0.0005,RS,False,none,Sydney,image_to_text_R@1,RN50,0.1,15,ALL,,False,base,1.21,,1
