In [1]:

import pandas as pd
import numpy as np

In [2]:
def map_at_3(predictions, labels):
    map_sum = 0
    pred = np.argsort(-1*np.array(predictions),axis=1)[:,:3]
    for x,y in zip(pred,labels):
        z = [1/i if y==j else 0 for i,j in zip([1,2,3],x)]
        map_sum += np.sum(z)
    return map_sum / len(predictions)

In [3]:
from itertools import product

In [18]:
def ensemble(files):
    pred_ary = []
    option_to_index = {option: idx for idx, option in enumerate('ABCDE')}
    for f in files:
        df = pd.read_csv(f)
        pred = df[["fold0_A", "fold0_B", "fold0_C", "fold0_D", "fold0_E"]].values
        pred_ary.append(pred)
        labels = df["answer"].map(option_to_index).values
        print(f"{f}: {map_at_3(pred, labels)}")
    rets = []
    for weights in product(*[np.arange(0, 1, 0.1)] * len(files)):
        
        pred = (pred_ary * np.array(weights).reshape(-1, 1, 1)).sum(axis=0)
        score = map_at_3(pred, labels)
        
        ret = {}
        for i in range(len(files)):
            ret[f"weight_{i}"] = weights[i]
        ret["score"] = score
        rets.append(ret)
    return pd.DataFrame(rets)

In [19]:
df_ret_test = ensemble([
    "../output/stage2/exp004.py/20230916022000_freeze0_maxlength384_context3_lr5e-6_100-stride75/test_predictions.csv",
    "../output/stage2/exp005.py/20230916152033_freeze0_maxlength512_context5_lr5e-6_100-stride75/test_predictions.csv",
    # "../output/stage2/exp005.py/20230917001210_freeze0_maxlength256_context3_lr5e-6_100-stride75_merge/test_predictions.csv",
])

../output/stage2/exp004.py/20230916022000_freeze0_maxlength384_context3_lr5e-6_100-stride75/test_predictions.csv: 0.9358333333333333
../output/stage2/exp005.py/20230916152033_freeze0_maxlength512_context5_lr5e-6_100-stride75/test_predictions.csv: 0.935


In [20]:
df_ret_test.sort_values("score")

Unnamed: 0,weight_0,weight_1,score
0,0.0,0.0,0.378333
17,0.1,0.7,0.931667
18,0.1,0.8,0.931667
59,0.5,0.9,0.934167
19,0.1,0.9,0.934167
...,...,...,...
62,0.6,0.2,0.948333
41,0.4,0.1,0.950000
92,0.9,0.2,0.950000
82,0.8,0.2,0.950000


In [21]:
df_ret_valid = ensemble([
    "../output/stage2/exp004.py/20230916022000_freeze0_maxlength384_context3_lr5e-6_100-stride75/valid_predictions.csv",
    "../output/stage2/exp005.py/20230916152033_freeze0_maxlength512_context5_lr5e-6_100-stride75/valid_predictions.csv"
])

../output/stage2/exp004.py/20230916022000_freeze0_maxlength384_context3_lr5e-6_100-stride75/valid_predictions.csv: 0.8686534216335542
../output/stage2/exp005.py/20230916152033_freeze0_maxlength512_context5_lr5e-6_100-stride75/valid_predictions.csv: 0.866997792494481


In [24]:
df_ret_valid

Unnamed: 0,weight_0,weight_1,score
0,0.0,0.0,0.376656
1,0.0,0.1,0.866998
2,0.0,0.2,0.866998
3,0.0,0.3,0.866998
4,0.0,0.4,0.866998
...,...,...,...
95,0.9,0.5,0.879415
96,0.9,0.6,0.879691
97,0.9,0.7,0.879691
98,0.9,0.8,0.878863


In [3]:
df1 = pd.read_csv("../output/stage2/exp004.py/20230916022000_freeze0_maxlength384_context3_lr5e-6_100-stride75/test_predictions.csv")

In [5]:
df2 = pd.read_csv(r"../output/stage2/exp005.py/20230916152033_freeze0_maxlength512_context5_lr5e-6_100-stride75/test_predictions.csv")

In [6]:
def softmax(x):
    x = np.exp(x)
    return x / x.sum(axis=1, keepdims=True)

In [8]:
pred_1 = df1[["fold0_A", "fold0_B", "fold0_C", "fold0_D", "fold0_E"]].values
pred_1 = (pred_1 - pred_1.mean()) / pred_1.std()
pred_2 = df2[["fold0_A", "fold0_B", "fold0_C", "fold0_D", "fold0_E"]].values
pred_2 = (pred_2 - pred_2.mean()) / pred_2.std()

0.9358333333333333

In [11]:
map_at_3(pred_2, labels)

0.935

In [12]:
for ratio in np.arange(0, 1.05, 0.05):
    print(f"ratio:{ratio} map3: {map_at_3(pred_1*ratio + pred_2*(1-ratio), labels)}")

ratio:0.0 map3: 0.935
ratio:0.05 map3: 0.9341666666666667
ratio:0.1 map3: 0.9316666666666668
ratio:0.15000000000000002 map3: 0.9341666666666667
ratio:0.2 map3: 0.9383333333333335
ratio:0.25 map3: 0.9358333333333334
ratio:0.30000000000000004 map3: 0.9366666666666668
ratio:0.35000000000000003 map3: 0.9383333333333332
ratio:0.4 map3: 0.9383333333333332
ratio:0.45 map3: 0.9408333333333333
ratio:0.5 map3: 0.9441666666666667
ratio:0.55 map3: 0.9441666666666667
ratio:0.6000000000000001 map3: 0.9441666666666667
ratio:0.65 map3: 0.9441666666666667
ratio:0.7000000000000001 map3: 0.945
ratio:0.75 map3: 0.9475
ratio:0.8 map3: 0.95
ratio:0.8500000000000001 map3: 0.9475
ratio:0.9 map3: 0.9408333333333333
ratio:0.9500000000000001 map3: 0.9383333333333332
ratio:1.0 map3: 0.9358333333333333
