In [1]:
import numpy as np
import pandas as pd

In [2]:
with open('./eval.txt', 'r') as f:
    results = f.readlines()

# Remove any non-result lines from the eval file, and split the lines on the tab character
# (results have format: model_name\tdataset_name\tmetric_name\tmetric_value)
results = [r.replace('\n','').split('\t') for r in results if '\t' in r]

In [3]:
def remove_underscore_after(val): # Remove underscore, and keep the part after the underscore
    return val.split('_')[-1]
def remove_underscore_before(val): # Remove underscore, and keep the part before the underscore
    return val.split('_')[0]
def map2d(func, grid): # Mapping for 2d arrays, from: https://stackoverflow.com/questions/70742445/elegant-map-over-2d-list
    return [[func(value) for value in row] for row in grid]
def full_display(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(df)

df = pd.DataFrame(results, columns = ['model', 'dataset', 'metric', 'value'])
df['value'] = pd.to_numeric(df['value'])
df['model'] = df['model'].map(lambda x: '-'.join(x.split('-')[2:]))
models = df['model'].str.split('-').tolist()

# Remove all underscores from our 2d list, keep one list of the param names and one with param vals
model_names_list = map2d(remove_underscore_before, models)
model_names = np.array(model_names_list)
model_val_list = map2d(remove_underscore_after, models)
model_vals = np.array(model_val_list)

In [4]:
model_vals.shape, model_names.shape

((36030, 14), (36030, 14))

In [5]:
# Splitting model name into more columns
for i in range(model_names.shape[-1]):
    name = model_names[0][i]
    val = model_vals[:,i]
    df[name] = val
    try:
       df[name] = pd.to_numeric(df[name]) 
    except:
        pass

# Remove columns that aren't relevant
df = df.drop(['fold', 'vit', 'model', 'data', 'ALL',  'kw'], axis = 1) #'method', 'AL.iter', 'ratio', 'PL',

# Replace 'None' with NaN, to allow conversion to numerical
df['AL.iter'] = df['AL.iter'].replace('None', np.nan)
df['AL.iter'] = pd.to_numeric(df['AL.iter'])
df['AL.epochs'] = df['AL.epochs'].replace('None', np.nan)
df['AL.epochs'] = pd.to_numeric(df['AL.epochs'])

cols = sorted(df.columns.tolist())
print(cols)
# The ratio have the format: 'ratio_xxx' where xxx is some float.
# We only want that float value, and need to convert it from string to float.
# df['ratio'] = pd.to_numeric(df['ratio'])
display(df)

# Group by all columns except the performance values, then compute mean, std and number of model runs for the performance
df_grouped = df.groupby(list(set(cols)-set(['value'])), dropna = False).agg({'value':['mean', 'std', 'count']})

['AL.epochs', 'AL.iter', 'PL', 'bs', 'dataset', 'epochs', 'lr', 'method', 'metric', 'ratio', 'value']


Unnamed: 0,dataset,metric,value,ratio,method,AL.iter,AL.epochs,PL,epochs,lr,bs
0,RSICD-CLS,zeroshot-val-top1,60.99,0.1,base,,,,5,0.000050,64
1,UCM-CLS,zeroshot-val-top1,56.84,0.1,base,,,,5,0.000050,64
2,RSICD,image_to_text_R@1,4.76,0.1,base,,,,5,0.000050,64
3,RSICD,image_to_text_R@5,20.05,0.1,base,,,,5,0.000050,64
4,RSICD,image_to_text_R@10,33.88,0.1,base,,,,5,0.000050,64
...,...,...,...,...,...,...,...,...,...,...,...
36025,Sydney,image_to_text_R@5,11.87,0.1,ours,,,hard.text,35,0.000005,128
36026,Sydney,image_to_text_R@10,20.12,0.1,ours,,,hard.text,35,0.000005,128
36027,Sydney,text_to_image_R@1,3.62,0.1,ours,,,hard.text,35,0.000005,128
36028,Sydney,text_to_image_R@5,12.88,0.1,ours,,,hard.text,35,0.000005,128


In [6]:
display(df_grouped)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,mean,std,count
lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
0.000005,0.1,base,,64,,5,image_to_text_R@1,RSICD,,3.287917,0.300637,24
0.000005,0.1,base,,64,,5,image_to_text_R@1,Sydney,,2.523333,0.216788,24
0.000005,0.1,base,,64,,5,image_to_text_R@1,UCM,,7.255833,1.424429,24
0.000005,0.1,base,,64,,5,image_to_text_R@10,RSICD,,24.362083,0.652633,24
0.000005,0.1,base,,64,,5,image_to_text_R@10,Sydney,,16.364583,0.744504,24
...,...,...,...,...,...,...,...,...,...,...,...,...
0.000500,0.1,ours,,128,soft.text,35,text_to_image_R@5,RSICD,,0.482500,0.045000,4
0.000500,0.1,ours,,128,soft.text,35,text_to_image_R@5,Sydney,,1.010000,0.000000,4
0.000500,0.1,ours,,128,soft.text,35,text_to_image_R@5,UCM,,2.142500,0.005000,4
0.000500,0.1,ours,,128,soft.text,35,zeroshot-val-top1,RSICD-CLS,,3.525000,0.226495,4


In [7]:
df_grouped.to_csv('eval.csv')

In [8]:
df_results = df_grouped.reset_index()
df_results['AL.epochs'] = df_results['AL.epochs'].fillna(df_results['epochs'])
df_results = df_results.sort_values(['metric', 'dataset', ('value', 'mean')])
# df_results = df_results.groupby(['metric', 'dataset'])
# df_results = df_grouped.reset_index()
# df_results = df_results.sort_values(['metric', 'dataset', ('value', 'mean')])

# df_2 = df_2[(df_2['method'] == 'ours') & (df_2['ratio'] == 0.2) & (df_2['PL'] == 'ot.image')] # (df_2['metric'] == 'image_to_text_R@5') &
# df_2 = df_2[(df_2['epochs'] > 20)  & (df_2['bs'] == 64) & (df_2['lr']==5e-5)] # &(df_2['lr']==5e-5)
# df_2 = df_2[(df_2['lr']==5e-5) & (df_2['bs'] == 64) & (df_2['method'] == 'base')] # &(df_2['lr']==5e-5)
display(df_results)

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
5720,0.00050,0.1,ours,35.0,128,hard.text,35,image_to_text_R@1,RSICD,,0.0450,0.051962,4
5320,0.00050,0.1,ours,5.0,64,soft.text,5,image_to_text_R@1,RSICD,,0.0675,0.045000,4
5420,0.00050,0.1,ours,30.0,64,soft.text,30,image_to_text_R@1,RSICD,,0.0675,0.045000,4
6200,0.00050,0.1,ours,15.0,128,soft.text,15,image_to_text_R@1,RSICD,,0.0675,0.045000,4
6260,0.00050,0.1,ours,30.0,128,soft.text,30,image_to_text_R@1,RSICD,,0.0675,0.045000,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4299,0.00005,0.5,base,5.0,64,,5,zeroshot-val-top1,UCM-CLS,,77.7550,3.436680,4
4419,0.00005,0.7,base,15.0,64,,15,zeroshot-val-top1,UCM-CLS,,77.9725,3.209884,4
4379,0.00005,0.7,base,5.0,64,,5,zeroshot-val-top1,UCM-CLS,,78.1875,5.256433,4
4339,0.00005,0.5,base,15.0,64,,15,zeroshot-val-top1,UCM-CLS,,78.2875,3.939225,4


In [9]:
df_results.shape

(6300, 13)

In [10]:
# Results with active learning
full_display(df_results[(df_results['AL.iter']>0) & (df_results['epochs']==15)]) #.groupby(['metric', 'dataset']).tail(3)

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
2140,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,RSICD,3.0,3.5475,0.539714,4
2280,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,RSICD,1.0,3.611111,0.673878,9
2282,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,RSICD,5.0,3.925556,0.536216,9
2281,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,RSICD,3.0,4.037778,0.666254,9
4180,5e-05,0.3,base,15.0,64,,15,image_to_text_R@1,RSICD,3.0,6.6375,0.968827,4
4260,5e-05,0.5,base,15.0,64,,15,image_to_text_R@1,RSICD,3.0,9.3625,0.823666,4
4340,5e-05,0.7,base,15.0,64,,15,image_to_text_R@1,RSICD,3.0,11.0825,0.775129,4
2285,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,Sydney,3.0,2.257778,0.985137,9
2141,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,Sydney,3.0,2.2625,1.047103,4
2286,5e-05,0.1,base,15.0,64,,15,image_to_text_R@1,Sydney,5.0,2.75,0.746341,9


In [11]:
# Results for base CLIP model (filtered a bit)
full_display(df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'base') & (df_results['lr']==5e-5) & (df_results['ratio']==0.1)].groupby(['metric', 'dataset']).tail(3))

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
2440,5e-05,0.1,base,25.0,64,,25,image_to_text_R@1,RSICD,,6.562222,0.843027,9
2480,5e-05,0.1,base,35.0,64,,35,image_to_text_R@1,RSICD,,6.614444,0.863411,9
2460,5e-05,0.1,base,30.0,64,,30,image_to_text_R@1,RSICD,,6.624444,0.898723,9
2441,5e-05,0.1,base,25.0,64,,25,image_to_text_R@1,Sydney,,4.113333,1.026779,9
2481,5e-05,0.1,base,35.0,64,,35,image_to_text_R@1,Sydney,,4.448889,0.837339,9
2461,5e-05,0.1,base,30.0,64,,30,image_to_text_R@1,Sydney,,4.494444,0.875416,9
2371,5e-05,0.1,base,20.0,64,,20,image_to_text_R@1,UCM,,14.047778,2.0628,9
2482,5e-05,0.1,base,35.0,64,,35,image_to_text_R@1,UCM,,14.714444,1.647727,9
2462,5e-05,0.1,base,30.0,64,,30,image_to_text_R@1,UCM,,15.191111,2.095772,9
2463,5e-05,0.1,base,30.0,64,,30,image_to_text_R@10,RSICD,,37.657778,1.341881,9


In [12]:
# Results for S-CLIP model (filtered a bit)
# & (df_results['lr']==5e-5)
df_S_CLIP = df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'ours') & (df_results['PL'].str.contains('ot.'))]
df_PL_soft = df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'ours') & (df_results['PL'].str.contains('soft.'))]
df_PL_hard = df_results[(df_results['AL.iter'].isna()) & (df_results['method'] == 'ours') & (df_results['PL'].str.contains('hard.'))]

In [13]:
full_display(df_S_CLIP.groupby(['metric', 'dataset']).tail(3))

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
3700,5e-05,0.1,ours,25.0,128,ot.image,25,image_to_text_R@1,RSICD,,5.891111,0.63359,9
3720,5e-05,0.1,ours,30.0,128,ot.image,30,image_to_text_R@1,RSICD,,5.892222,0.510803,9
3740,5e-05,0.1,ours,35.0,128,ot.image,35,image_to_text_R@1,RSICD,,6.136667,0.590656,9
3661,5e-05,0.1,ours,15.0,128,ot.image,15,image_to_text_R@1,Sydney,,3.465556,0.565401,9
3721,5e-05,0.1,ours,30.0,128,ot.image,30,image_to_text_R@1,Sydney,,3.511111,0.860936,9
3741,5e-05,0.1,ours,35.0,128,ot.image,35,image_to_text_R@1,Sydney,,3.644444,0.951513,9
3702,5e-05,0.1,ours,25.0,128,ot.image,25,image_to_text_R@1,UCM,,12.382222,1.689333,9
3722,5e-05,0.1,ours,30.0,128,ot.image,30,image_to_text_R@1,UCM,,12.525556,1.375873,9
3742,5e-05,0.1,ours,35.0,128,ot.image,35,image_to_text_R@1,UCM,,12.715556,1.136421,9
3703,5e-05,0.1,ours,25.0,128,ot.image,25,image_to_text_R@10,RSICD,,37.594444,1.299741,9


In [14]:
full_display(df_PL_soft.groupby(['metric', 'dataset']).tail(3))

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
3300,5e-05,0.1,ours,30.0,64,soft.text,30,image_to_text_R@1,RSICD,,6.41,0.863752,4
3320,5e-05,0.1,ours,35.0,64,soft.text,35,image_to_text_R@1,RSICD,,6.48,1.161981,4
4000,5e-05,0.1,ours,30.0,128,soft.image,30,image_to_text_R@1,RSICD,,6.5225,0.61114,4
3981,5e-05,0.1,ours,25.0,128,soft.image,25,image_to_text_R@1,Sydney,,3.9725,0.859278,4
3301,5e-05,0.1,ours,30.0,64,soft.text,30,image_to_text_R@1,Sydney,,3.975,0.30556,4
3321,5e-05,0.1,ours,35.0,64,soft.text,35,image_to_text_R@1,Sydney,,4.025,0.521568,4
3322,5e-05,0.1,ours,35.0,64,soft.text,35,image_to_text_R@1,UCM,,13.69,1.731146,4
4122,5e-05,0.1,ours,25.0,128,soft.text,25,image_to_text_R@1,UCM,,13.9025,2.312536,4
4162,5e-05,0.1,ours,35.0,128,soft.text,35,image_to_text_R@1,UCM,,14.01,2.254669,4
4163,5e-05,0.1,ours,35.0,128,soft.text,35,image_to_text_R@10,RSICD,,38.7825,2.804786,4


In [15]:
full_display(df_PL_hard.groupby(['metric', 'dataset']).tail(3))

Unnamed: 0_level_0,lr,ratio,method,AL.epochs,bs,PL,epochs,metric,dataset,AL.iter,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,mean,std,count
2720,5e-05,0.1,ours,25.0,64,hard.text,25,image_to_text_R@1,RSICD,,6.1125,0.802055,4
3600,5e-05,0.1,ours,35.0,128,hard.text,35,image_to_text_R@1,RSICD,,6.135,1.026629,4
2760,5e-05,0.1,ours,35.0,64,hard.text,35,image_to_text_R@1,RSICD,,6.27,0.37674,4
2721,5e-05,0.1,ours,25.0,64,hard.text,25,image_to_text_R@1,Sydney,,3.875,1.242591,4
3581,5e-05,0.1,ours,30.0,128,hard.text,30,image_to_text_R@1,Sydney,,4.025,0.957793,4
3601,5e-05,0.1,ours,35.0,128,hard.text,35,image_to_text_R@1,Sydney,,4.1275,0.891829,4
3442,5e-05,0.1,ours,30.0,128,hard.image,30,image_to_text_R@1,UCM,,13.2625,1.344281,4
3462,5e-05,0.1,ours,35.0,128,hard.image,35,image_to_text_R@1,UCM,,13.3675,1.51867,4
3582,5e-05,0.1,ours,30.0,128,hard.text,30,image_to_text_R@1,UCM,,13.37,1.238548,4
3603,5e-05,0.1,ours,35.0,128,hard.text,35,image_to_text_R@10,RSICD,,36.79,1.892529,4
