In [1]:
import numpy as np
import pandas as pd

In [2]:
with open('./eval.txt', 'r') as f:
    results = f.readlines()

# Remove any non-result lines from the eval file, and split the lines on the tab character
# (results have format: model_name\tdataset_name\tmetric_name\tmetric_value)
results = [r.replace('\n','').split('\t') for r in results if '\t' in r]

In [3]:
def remove_underscore_after(val): # Remove underscore, and keep the part after the underscore
    return val.split('_')[-1]
def remove_underscore_before(val): # Remove underscore, and keep the part before the underscore
    return val.split('_')[0]
def map2d(func, grid): # Mapping for 2d arrays, from: https://stackoverflow.com/questions/70742445/elegant-map-over-2d-list
    return [[func(value) for value in row] for row in grid]
def full_display(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(df)

df = pd.DataFrame(results, columns = ['model', 'dataset', 'metric', 'value'])
df['value'] = pd.to_numeric(df['value'])
df['model'] = df['model'].map(lambda x: '-'.join(x.split('-')[2:]))
models = df['model'].str.split('-').tolist()

# Remove all underscores from our 2d list, keep one list of the param names and one with param vals
model_names_list = map2d(remove_underscore_before, models)
model_names = np.array(model_names_list)
model_val_list = map2d(remove_underscore_after, models)
model_vals = np.array(model_val_list)

In [4]:
model_vals.shape, model_names.shape

((10802, 13), (10802, 13))

In [5]:
# Splitting model name into more columns
for i in range(model_names.shape[-1]):
    name = model_names[0][i]
    val = model_vals[:,i]
    df[name] = val
    try:
       df[name] = pd.to_numeric(df[name]) 
    except:
        pass

# Remove columns that aren't relevant
df = df.drop(['fold', 'PL', 'vit', 'model', 'data', 'ALL', 'ratio', 'kw'], axis = 1) #'method', 'AL.iter', 

# Replace 'None' in the AL iter row with NaN, to allow conversion to numerical
df['AL.iter'] = df['AL.iter'].replace('None', np.nan)
df['AL.iter'] = pd.to_numeric(df['AL.iter'])

cols = sorted(df.columns.tolist())
print(cols)
# The ratio have the format: 'ratio_xxx' where xxx is some float.
# We only want that float value, and need to convert it from string to float.
# df['ratio'] = pd.to_numeric(df['ratio'])
display(df)

# Group by all columns except the performance values, then compute mean, std and number of model runs for the performance
df_grouped = df.groupby(list(set(cols)-set(['value'])), dropna = False).agg({'value':['mean', 'std', 'count']})

['AL.iter', 'bs', 'dataset', 'epochs', 'lr', 'method', 'metric', 'value']


Unnamed: 0,dataset,metric,value,method,AL.iter,epochs,lr,bs
0,RSICD-CLS,zeroshot-val-top1,60.99,base,,5,0.00005,64
1,UCM-CLS,zeroshot-val-top1,56.84,base,,5,0.00005,64
2,RSICD,image_to_text_R@1,4.76,base,,5,0.00005,64
3,RSICD,image_to_text_R@5,20.05,base,,5,0.00005,64
4,RSICD,image_to_text_R@10,33.88,base,,5,0.00005,64
...,...,...,...,...,...,...,...,...
10797,Sydney,image_to_text_R@5,8.45,base,20.0,5,0.00005,64
10798,Sydney,image_to_text_R@10,13.88,base,20.0,5,0.00005,64
10799,Sydney,text_to_image_R@1,2.62,base,20.0,5,0.00005,64
10800,Sydney,text_to_image_R@5,9.86,base,20.0,5,0.00005,64


In [6]:
display(df_grouped)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,mean,std,count
metric,lr,AL.iter,dataset,method,epochs,bs,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
image_to_text_R@1,0.000005,,RSICD,ours,5,64,3.966000,0.404261,10
image_to_text_R@1,0.000005,,RSICD,ours,5,128,3.114444,0.525288,9
image_to_text_R@1,0.000005,,RSICD,ours,10,64,4.764000,0.684044,10
image_to_text_R@1,0.000005,,RSICD,ours,10,128,4.163333,0.439886,9
image_to_text_R@1,0.000005,,RSICD,ours,15,64,4.828000,0.404991,10
...,...,...,...,...,...,...,...,...,...
zeroshot-val-top1,0.000500,,UCM-CLS,ours,25,128,6.191111,2.040695,9
zeroshot-val-top1,0.000500,,UCM-CLS,ours,30,64,5.333333,0.714598,9
zeroshot-val-top1,0.000500,,UCM-CLS,ours,30,128,5.856667,2.621769,9
zeroshot-val-top1,0.000500,,UCM-CLS,ours,35,64,5.380000,1.664527,9


In [7]:
df_grouped.to_csv('eval.csv')

In [8]:
df_results = df_grouped.reset_index()
df_results = df_results.sort_values(['metric', 'dataset', ('value', 'mean')])
# df_2 = df_2[(df_2['method'] == 'ours') & (df_2['ratio'] == 0.2) & (df_2['PL'] == 'ot.image')] # (df_2['metric'] == 'image_to_text_R@5') &
# df_2 = df_2[(df_2['epochs'] > 20)  & (df_2['bs'] == 64) & (df_2['lr']==5e-5)] # &(df_2['lr']==5e-5)
# df_2 = df_2[(df_2['lr']==5e-5) & (df_2['bs'] == 64) & (df_2['method'] == 'base')] # &(df_2['lr']==5e-5)
display(df_results)

Unnamed: 0_level_0,metric,lr,AL.iter,dataset,method,epochs,bs,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,std,count
141,image_to_text_R@1,0.00050,,RSICD,ours,5,64,0.070000,0.060000,9
142,image_to_text_R@1,0.00050,,RSICD,ours,5,128,0.080000,0.030000,9
149,image_to_text_R@1,0.00050,,RSICD,ours,25,64,0.080000,0.030000,9
151,image_to_text_R@1,0.00050,,RSICD,ours,30,64,0.090000,0.045000,9
143,image_to_text_R@1,0.00050,,RSICD,ours,10,64,0.100000,0.030000,9
...,...,...,...,...,...,...,...,...,...,...
1185,zeroshot-val-top1,0.00005,,UCM-CLS,ours,35,128,65.145556,1.852924,9
1168,zeroshot-val-top1,0.00005,,UCM-CLS,base,20,64,66.242222,2.822781,9
1169,zeroshot-val-top1,0.00005,,UCM-CLS,base,25,64,66.857778,3.878353,9
1171,zeroshot-val-top1,0.00005,,UCM-CLS,base,35,64,67.378889,2.900217,9


In [9]:
df_results.shape

(1220, 10)

In [10]:
# Results with active learning
full_display(df_results[df_results['AL.iter']>0])

Unnamed: 0_level_0,metric,lr,AL.iter,dataset,method,epochs,bs,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,std,count
135,image_to_text_R@1,0.0005,3.0,RSICD,base,10,64,0.182222,0.114322,9
138,image_to_text_R@1,0.0005,5.0,RSICD,base,10,64,0.201111,0.10043,9
132,image_to_text_R@1,0.0005,1.0,RSICD,base,10,64,0.243333,0.147054,9
63,image_to_text_R@1,5e-05,10.0,RSICD,base,5,64,2.695556,0.377992,9
42,image_to_text_R@1,5e-05,1.0,RSICD,base,10,64,3.215556,0.47969,9
66,image_to_text_R@1,5e-05,20.0,RSICD,base,5,64,3.3,,1
43,image_to_text_R@1,5e-05,1.0,RSICD,base,15,64,3.611111,0.673878,9
44,image_to_text_R@1,5e-05,1.0,RSICD,base,20,64,3.622222,0.713474,9
58,image_to_text_R@1,5e-05,5.0,RSICD,base,20,64,3.8,0.707107,2
57,image_to_text_R@1,5e-05,5.0,RSICD,base,15,64,3.925556,0.536216,9


In [11]:
# Results for base CLIP model (filtered a bit)
full_display(df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'base') & (df_results['lr']==5e-5)])

Unnamed: 0_level_0,metric,lr,AL.iter,dataset,method,epochs,bs,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,std,count
69,image_to_text_R@1,5e-05,,RSICD,base,5,64,4.436667,0.898318,9
71,image_to_text_R@1,5e-05,,RSICD,base,15,64,5.308889,1.151938,9
70,image_to_text_R@1,5e-05,,RSICD,base,10,64,5.444444,0.817054,9
72,image_to_text_R@1,5e-05,,RSICD,base,20,64,6.054444,0.701037,9
73,image_to_text_R@1,5e-05,,RSICD,base,25,64,6.562222,0.843027,9
75,image_to_text_R@1,5e-05,,RSICD,base,35,64,6.614444,0.863411,9
74,image_to_text_R@1,5e-05,,RSICD,base,30,64,6.624444,0.898723,9
90,image_to_text_R@1,5e-05,,Sydney,base,5,64,2.951111,0.688975,9
91,image_to_text_R@1,5e-05,,Sydney,base,10,64,2.952222,0.690557,9
92,image_to_text_R@1,5e-05,,Sydney,base,15,64,3.553333,0.636808,9


In [12]:
# Results for S-CLIP model (filtered a bit)
full_display(df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'ours') & (df_results['lr']==5e-5)])

Unnamed: 0_level_0,metric,lr,AL.iter,dataset,method,epochs,bs,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,std,count
76,image_to_text_R@1,5e-05,,RSICD,ours,5,64,3.895556,0.551886,9
78,image_to_text_R@1,5e-05,,RSICD,ours,10,64,4.108889,0.824704,9
77,image_to_text_R@1,5e-05,,RSICD,ours,5,128,4.324444,0.728373,9
80,image_to_text_R@1,5e-05,,RSICD,ours,15,64,4.69,0.654122,9
79,image_to_text_R@1,5e-05,,RSICD,ours,10,128,4.923333,0.831054,9
86,image_to_text_R@1,5e-05,,RSICD,ours,30,64,5.035556,0.353097,9
81,image_to_text_R@1,5e-05,,RSICD,ours,15,128,5.087778,1.073263,9
82,image_to_text_R@1,5e-05,,RSICD,ours,20,64,5.096667,0.650481,9
84,image_to_text_R@1,5e-05,,RSICD,ours,25,64,5.361111,0.48599,9
83,image_to_text_R@1,5e-05,,RSICD,ours,20,128,5.362222,0.758614,9
