In [1]:
import numpy as np
import pandas as pd
import warnings
import itertools
import multiprocessing
from tqdm import tqdm_notebook
from scipy.stats import spearmanr
warnings.filterwarnings("ignore")

In [2]:
train = pd.read_csv('data/train.csv')
TARGETS = train.columns[11:]

ix = np.sort(np.unique(train["question_body"], return_index=True)[1])
y = train[TARGETS].values

y_use = pd.read_csv('oofs/use_oof.csv').values
y_albert = pd.read_csv('oofs/double_albert_tuned_oofs.csv', index_col=0).values
y_bert = pd.read_csv('oofs/siamese_bert_tuned_oofs.csv', index_col=0).values
y_roberta = pd.read_csv('oofs/siamese_roberta_tuned_oofs.csv', index_col=0).values
y_xlnet = pd.read_csv('oofs/siamese_xlnet_tuned_oofs.csv', index_col=0).values

preds = [y_use, y_roberta, y_bert, y_xlnet, y_albert]
n_models = len(preds)

In [3]:
def evaluate(y, y_pred, verbose=False, exclude=[]):
    score = 0
    for i in range(y.shape[1]):
        if i not in exclude:
            if i < 21:
                col_score = spearmanr(y[ix, i], y_pred[ix, i])[0]
            else:
                col_score = spearmanr(y[:, i], y_pred[:, i])[0]
            if verbose:
                print(TARGETS[i], np.round(col_score, 3))
            score += col_score/(y.shape[1]-len(exclude))
    return np.round(score, 4)


ds = [4, 8, 16, 32, 64, None]
ws = [0, 1, 2, 4]


def scale(x, d):
    if d:
        return (x//(1/d))/d
    return x

dws = list(itertools.product(ds, ds, *(n_models*[ws])))


def transform(preds, params, c):
    (d_global, d_local), ws = params[:2], params[2:]
    y_temp = 0
    for pred, w in zip(preds, ws):
        y_temp += w * scale(pred[:, c], d_local)
    y_temp /= sum(ws)
    y_temp = scale(y_temp, d_global)
    return y_temp


def f(c):
    max_score = spearmanr(y[:, c], y_roberta[:, c])[0]
    best_index = -1
    for i, params in enumerate(dws):
        if sum(params[2:]) == 0:
            continue
        y_temp = transform(preds, params, c)
        if i < 21:
            score = spearmanr(y[ix, c], y_temp[ix])[0]
        else:
            score = spearmanr(y[:, c], y_temp)[0]
        if score > max_score:
            max_score = score
            best_index = i
            
    return best_index

In [4]:
pool = multiprocessing.Pool(15)
out = pool.map(f, range(30))

In [5]:
y_combined = np.zeros(y.shape)

for c in range(30):
    print(c, TARGETS[c], dws[out[c]])
    y_combined[:, c] = transform(preds, dws[out[c]], c)

0 question_asker_intent_understanding (64, 16, 4, 1, 1, 4, 4)
1 question_body_critical (None, None, 2, 0, 1, 0, 1)
2 question_conversational (4, 32, 1, 4, 0, 1, 1)
3 question_expect_short_answer (16, 64, 1, 4, 2, 0, 1)
4 question_fact_seeking (8, 32, 2, 2, 1, 4, 0)
5 question_has_commonly_accepted_answer (4, None, 1, 4, 2, 1, 4)
6 question_interestingness_others (32, 16, 4, 0, 1, 2, 4)
7 question_interestingness_self (16, 32, 2, 1, 2, 1, 2)
8 question_multi_intent (8, 16, 2, 1, 1, 2, 2)
9 question_not_really_a_question (16, 16, 1, 0, 2, 2, 0)
10 question_opinion_seeking (32, 16, 2, 4, 1, 2, 2)
11 question_type_choice (16, 4, 2, 2, 0, 1, 4)
12 question_type_compare (4, None, 0, 4, 1, 4, 0)
13 question_type_consequence (4, 8, 0, 1, 0, 4, 0)
14 question_type_definition (8, 8, 0, 4, 1, 0, 0)
15 question_type_entity (4, 8, 2, 1, 0, 4, 0)
16 question_type_instructions (4, None, 2, 4, 0, 1, 1)
17 question_type_procedure (64, 16, 1, 4, 0, 4, 2)
18 question_type_reason_explanation (32, 64, 2, 4

In [6]:
evaluate(y, y_combined, True)

question_asker_intent_understanding 0.39
question_body_critical 0.677
question_conversational 0.418
question_expect_short_answer 0.326
question_fact_seeking 0.342
question_has_commonly_accepted_answer 0.45
question_interestingness_others 0.353
question_interestingness_self 0.494
question_multi_intent 0.606
question_not_really_a_question 0.229
question_opinion_seeking 0.481
question_type_choice 0.781
question_type_compare 0.561
question_type_consequence 0.314
question_type_definition 0.653
question_type_entity 0.614
question_type_instructions 0.778
question_type_procedure 0.36
question_type_reason_explanation 0.684
question_type_spelling 0.833
question_well_written 0.522
answer_helpful 0.287
answer_level_of_information 0.48
answer_plausible 0.188
answer_relevance 0.196
answer_satisfaction 0.391
answer_type_instructions 0.775
answer_type_procedure 0.328
answer_type_reason_explanation 0.703
answer_well_written 0.252


0.4822

In [7]:
evaluate(y, y_combined, False, exclude=[9,19])

0.4787

In [8]:
[dws[out[c]] for c in range(30)]

[(64, 16, 4, 1, 1, 4, 4),
 (None, None, 2, 0, 1, 0, 1),
 (4, 32, 1, 4, 0, 1, 1),
 (16, 64, 1, 4, 2, 0, 1),
 (8, 32, 2, 2, 1, 4, 0),
 (4, None, 1, 4, 2, 1, 4),
 (32, 16, 4, 0, 1, 2, 4),
 (16, 32, 2, 1, 2, 1, 2),
 (8, 16, 2, 1, 1, 2, 2),
 (16, 16, 1, 0, 2, 2, 0),
 (32, 16, 2, 4, 1, 2, 2),
 (16, 4, 2, 2, 0, 1, 4),
 (4, None, 0, 4, 1, 4, 0),
 (4, 8, 0, 1, 0, 4, 0),
 (8, 8, 0, 4, 1, 0, 0),
 (4, 8, 2, 1, 0, 4, 0),
 (4, None, 2, 4, 0, 1, 1),
 (64, 16, 1, 4, 0, 4, 2),
 (32, 64, 2, 4, 1, 2, 4),
 (16, 64, 2, 1, 4, 0, 0),
 (32, None, 2, 1, 4, 4, 1),
 (32, 8, 0, 4, 1, 1, 4),
 (64, None, 4, 4, 1, 1, 2),
 (32, None, 1, 2, 0, 0, 2),
 (8, None, 1, 2, 2, 1, 2),
 (32, 16, 1, 4, 0, 4, 4),
 (8, 32, 1, 2, 0, 0, 1),
 (32, None, 1, 2, 1, 1, 0),
 (64, 64, 2, 2, 0, 1, 2),
 (16, None, 0, 2, 4, 1, 2)]

In [9]:
np.array([dws[out[c]] for c in range(30)])[:,2:].sum(axis=0)

array([46, 71, 34, 53, 52], dtype=object)