In [1]:
import numpy as np
import pandas as pd
import warnings
import itertools
from tqdm import tqdm_notebook
from scipy.stats import spearmanr
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")

In [2]:
train = pd.read_csv(f'data/train.csv')
TARGETS = train.columns[11:]

for col in TARGETS:
    train[col] = train[col].rank(method="average")
train[TARGETS] = MinMaxScaler().fit_transform(train[TARGETS])
y = train[TARGETS].values
ix = np.where(train.groupby("question_body")["host"].transform("count") == 1)[0]

y_use = pd.read_csv('oofs/use_oof.csv').values
y_albert = pd.read_csv('oofs/siamese_albert_1_comb_oofs.csv', index_col=0).values
y_bert = pd.read_csv('oofs/siamese_bert_6_oofs.csv', index_col=0).values
y_roberta = pd.read_csv('oofs/siamese_roberta_1_comb_oofs.csv', index_col=0).values
y_xlnet = pd.read_csv('oofs/siamese_xlnet_1_comb_oofs.csv', index_col=0).values
# y_xlnet = pd.read_csv('oofs/siamese_xlnet_2_comb_oofs.csv', index_col=0).values

preds = [y_use, y_roberta, y_bert, y_xlnet, y_albert]
n_models = len(preds)

In [3]:
def evaluate(y, y_pred, verbose=False, exclude=[]):
    score = 0
    for i in range(y.shape[1]):
        if i not in exclude:
            col_score = spearmanr(y[:, i], y_pred[:, i])[0]
            if verbose:
                print(TARGETS[i], np.round(col_score, 3))
            score += col_score/(y.shape[1]-len(exclude))
    return np.round(score, 4)


ds = [4, 8, 16, 32, 64, None]


def scale(x, d):
    if d:
        return (x//(1/d))/d
    return x

ds2 = list(itertools.product(ds, ds))


def transform(preds, params, c):
    d_global, d_local = params
    X = np.vstack(preds).T
    ws, _, _, _ = np.linalg.lstsq(X, y[:,c].reshape(-1,1))
    ws = np.round(ws, 2)
    y_temp = (X @ ws).flatten()
    y_temp /= sum(ws)
    y_temp = scale(y_temp, d_global)
    return y_temp


def f(c):
    max_score = spearmanr(y[:, c], y_roberta[:, c])[0]
    best_index = -1
    for i, params in enumerate(dws5):
        y_temp = transform(preds, params, c)
        score = spearmanr(y[:, c], y_temp)[0]
        if score > max_score:
            max_score = score
            best_index = i
            
    return best_index

In [4]:
import multiprocessing

pool = multiprocessing.Pool(15)
out = pool.map(f, range(N_TARGETS))

In [5]:
y_combined = np.zeros(y.shape)

for c in range(30):
    print(c, TARGETS[c], ds2[out[c]])
    y_combined[:, c] = transform(y_use, y_albert, y_roberta, y_bert, y_xlnet, ds2[out[c]], c)

0 question_asker_intent_understanding (None, 16)
1 question_body_critical (None, 64)
2 question_conversational (4, None)
3 question_expect_short_answer (32, 64)
4 question_fact_seeking (16, 32)
5 question_has_commonly_accepted_answer (None, 2)
6 question_interestingness_others (64, None)
7 question_interestingness_self (16, None)
8 question_multi_intent (8, 32)
9 question_not_really_a_question (32, None)
10 question_opinion_seeking (32, 16)
11 question_type_choice (16, 4)
12 question_type_compare (4, None)
13 question_type_consequence (16, 4)
14 question_type_definition (8, 8)
15 question_type_entity (8, 8)
16 question_type_instructions (16, 4)
17 question_type_procedure (32, 16)
18 question_type_reason_explanation (32, 64)
19 question_type_spelling (32, 16)
20 question_well_written (None, 64)
21 answer_helpful (None, 8)
22 answer_level_of_information (64, None)
23 answer_plausible (16, None)
24 answer_relevance (32, None)
25 answer_satisfaction (16, 64)
26 answer_type_instructions (32

In [6]:
evaluate(y, y_combined, True)

question_asker_intent_understanding 0.401
question_body_critical 0.67
question_conversational 0.504
question_expect_short_answer 0.323
question_fact_seeking 0.378
question_has_commonly_accepted_answer 0.479
question_interestingness_others 0.368
question_interestingness_self 0.522
question_multi_intent 0.608
question_not_really_a_question 0.152
question_opinion_seeking 0.497
question_type_choice 0.773
question_type_compare 0.565
question_type_consequence 0.264
question_type_definition 0.653
question_type_entity 0.612
question_type_instructions 0.792
question_type_procedure 0.384
question_type_reason_explanation 0.685
question_type_spelling 0.497
question_well_written 0.537
answer_helpful 0.275
answer_level_of_information 0.472
answer_plausible 0.167
answer_relevance 0.192
answer_satisfaction 0.378
answer_type_instructions 0.773
answer_type_procedure 0.327
answer_type_reason_explanation 0.7
answer_well_written 0.245


0.4731

In [7]:
evaluate(y, y_combined, True, exclude=[9,19])

question_asker_intent_understanding 0.401
question_body_critical 0.67
question_conversational 0.504
question_expect_short_answer 0.323
question_fact_seeking 0.378
question_has_commonly_accepted_answer 0.479
question_interestingness_others 0.368
question_interestingness_self 0.522
question_multi_intent 0.608
question_opinion_seeking 0.497
question_type_choice 0.773
question_type_compare 0.565
question_type_consequence 0.264
question_type_definition 0.653
question_type_entity 0.612
question_type_instructions 0.792
question_type_procedure 0.384
question_type_reason_explanation 0.685
question_well_written 0.537
answer_helpful 0.275
answer_level_of_information 0.472
answer_plausible 0.167
answer_relevance 0.192
answer_satisfaction 0.378
answer_type_instructions 0.773
answer_type_procedure 0.327
answer_type_reason_explanation 0.7
answer_well_written 0.245


0.4837

In [8]:
[ds2[out[c]] for c in range(30)]

[(None, 16),
 (None, 64),
 (4, None),
 (32, 64),
 (16, 32),
 (None, 2),
 (64, None),
 (16, None),
 (8, 32),
 (32, None),
 (32, 16),
 (16, 4),
 (4, None),
 (16, 4),
 (8, 8),
 (8, 8),
 (16, 4),
 (32, 16),
 (32, 64),
 (32, 16),
 (None, 64),
 (None, 8),
 (64, None),
 (16, None),
 (32, None),
 (16, 64),
 (32, 64),
 (None, None),
 (64, 32),
 (None, None)]