### Import Libraries

In [1]:
import sys

import time
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold, GroupShuffleSplit
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import tqdm
import tensorflow as tf
import tensorflow.keras.backend as K
import os
from scipy.stats import spearmanr
from math import floor, ceil
from transformers import *
from sklearn.utils import shuffle
from collections import defaultdict
import html
import gc

np.set_printoptions(suppress=True)
print(tf.__version__)

2.1.0


### Read Data

In [2]:
PATH = '../input/google-quest-challenge/'
BERT_PATH = '../input/bert-base-uncased-huggingface-transformer/'
BERT_WEIGHTS_PATH = '../input/bertv3/'
tokenizer = BertTokenizer.from_pretrained(BERT_PATH+'bert-base-uncased-vocab.txt')

MAX_SEQUENCE_LENGTH = 512

df_train = pd.read_csv(PATH+'train.csv')
df_test = pd.read_csv(PATH+'test.csv')
df_sub = pd.read_csv(PATH+'sample_submission.csv')
print('train shape =', df_train.shape)
print('test shape =', df_test.shape)

df_train.question_body = df_train.question_body.apply(html.unescape)
df_train.answer        = df_train.answer.apply(html.unescape)
df_test.question_body = df_test.question_body.apply(html.unescape)
df_test.answer        = df_test.answer.apply(html.unescape)


output_categories = list(df_train.columns[11:])
input_categories = list(df_train.columns[[1, 2, 5, 9]])
print('\noutput categories:\n\t', output_categories)
print('\ninput categories:\n\t', input_categories)

df_train.question_body = df_train.question_body.apply(html.unescape)
df_train.answer        = df_train.answer.apply(html.unescape)
df_test.question_body = df_test.question_body.apply(html.unescape)
df_test.answer        = df_test.answer.apply(html.unescape)

train shape = (6079, 41)
test shape = (476, 11)

output categories:
	 ['question_asker_intent_understanding', 'question_body_critical', 'question_conversational', 'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer', 'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice', 'question_type_compare', 'question_type_consequence', 'question_type_definition', 'question_type_entity', 'question_type_instructions', 'question_type_procedure', 'question_type_reason_explanation', 'question_type_spelling', 'question_well_written', 'answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance', 'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 'answer_type_reason_explanation', 'answer_well_written']

input categories:
	 ['question_title', 'question_body', 'answer', 'category']


### Preprocess Data

In [3]:
def _convert_to_transformer_inputs(title, question, answer, tokenizer, max_sequence_length):
    """Converts tokenized input to ids, masks and segments for transformer (including bert)"""
    
    def return_id(str1, str2, truncation_strategy, length):

        inputs = tokenizer.encode_plus(str1, str2,
            add_special_tokens=True,
            max_length=length,
            truncation_strategy=truncation_strategy)
        
        input_ids =  inputs["input_ids"]
        input_masks = [1] * len(input_ids)
        input_segments = inputs["token_type_ids"]
        padding_length = length - len(input_ids)
        padding_id = tokenizer.pad_token_id
        input_ids = input_ids + ([padding_id] * padding_length)
        input_masks = input_masks + ([0] * padding_length)
        input_segments = input_segments + ([0] * padding_length)
        
        return [input_ids, input_masks, input_segments]
    
    input_ids_q, input_masks_q, input_segments_q = return_id(
        title + ' ' + question, None, 'longest_first', max_sequence_length)
    
    input_ids_qa, input_masks_qa, input_segments_qa = return_id(
        title + ' ' + question, answer, 'longest_first', max_sequence_length)
    
    # input_ids_a, input_masks_a, input_segments_a = return_id(
    #     answer, None, 'longest_first', max_sequence_length)
    
    return [input_ids_q, input_masks_q, input_segments_q,
            input_ids_qa, input_masks_qa, input_segments_qa,]
            # input_ids_a, input_masks_a, input_segments_a]

def compute_input_arrays(df, columns, tokenizer, max_sequence_length):
    input_ids_q, input_masks_q, input_segments_q = [], [], []
    input_ids_qa, input_masks_qa, input_segments_qa = [], [], []
    input_categories = []
    
    for _, instance in df[columns].iterrows():
        t, q, a, c = instance.question_title, instance.question_body, instance.answer, instance.category

        ids_q, masks_q, segments_q, ids_qa, masks_qa, segments_qa = \
        _convert_to_transformer_inputs(t, q, a, tokenizer, max_sequence_length)
        
        input_ids_q.append(ids_q)
        input_masks_q.append(masks_q)
        input_segments_q.append(segments_q)

        input_ids_qa.append(ids_qa)
        input_masks_qa.append(masks_qa)
        input_segments_qa.append(segments_qa)
        
        input_categories.append([c])

        # input_ids_a.append(ids_a)
        # input_masks_a.append(masks_a)
        # input_segments_a.append(segments_a)
        
    return [np.asarray(input_ids_q, dtype=np.int32), 
            np.asarray(input_masks_q, dtype=np.int32), 
            np.asarray(input_segments_q, dtype=np.int32),
            np.asarray(input_ids_qa, dtype=np.int32), 
            np.asarray(input_masks_qa, dtype=np.int32), 
            np.asarray(input_segments_qa, dtype=np.int32),]
            #np.asarray(input_categories, dtype=np.int32),]
            # np.asarray(input_ids_a, dtype=np.int32), 
            # np.asarray(input_masks_a, dtype=np.int32), 
            # np.asarray(input_segments_a, dtype=np.int32)]

def compute_output_arrays(df, columns):
    return np.asarray(df[columns])

In [4]:
outputs = compute_output_arrays(df_train, output_categories)
inputs = compute_input_arrays(df_train, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)
test_inputs = compute_input_arrays(df_test, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)

### PostProcessing

In [5]:

def postprocess_nariman(preds):
    preds_temp = preds.copy()

    for i in range(preds_temp.shape[0]):
        for j in range(preds_temp.shape[1]):
            preds_temp[i][j] = min(unique_targets_n, key=lambda x: abs(x - preds_temp[i][j]))
    
    for i in range(preds.shape[1]):
      if len(np.unique(preds_temp[:,i])) < 8:
        preds_temp[:,i] = preds[:,i]



    return preds_temp

def remove_null(preds):
    for col in range(preds.shape[1]):
        if np.isnan(preds[:, col]).any():
            preds[:, col][np.argwhere(np.isnan(preds[:,col]))] = 0.001
            
    return preds

targets = np.zeros(0)
for col in range(11, 41):
    targets = np.append(targets, df_train.iloc[:, col].unique())

unique_targets_n = np.unique(targets)
unique_targets_n[unique_targets_n == 1] = 0.999999
unique_targets_n[unique_targets_n == 0] = 0.01
unique_targets_n

array([0.01      , 0.2       , 0.26666667, 0.3       , 0.33333333,
       0.33333333, 0.4       , 0.44444444, 0.46666667, 0.5       ,
       0.53333333, 0.55555556, 0.6       , 0.66666667, 0.66666667,
       0.7       , 0.73333333, 0.77777778, 0.8       , 0.83333333,
       0.86666667, 0.88888889, 0.9       , 0.93333333, 0.999999  ])

## PostProcessing Mark

In [6]:
unique_targets = np.unique(df_train.iloc[:, 11:])
ids = [2, 11, 12, 14, 15]

def discretize(preds):
    preds_temp = preds.copy()

    for j in range(preds_temp.shape[1]):
      for i in range(preds_temp.shape[0]):
          preds_temp[i, j] = min(unique_targets, key=lambda x: abs(x - preds_temp[i, j]))

    for i in range(preds_temp.shape[1]):
      if i not in ids:
        preds_temp[:, i] = preds[:, i]
    
    return preds_temp

def postprocess_mark(preds):
  preds = discretize(preds)
  return preds

def scale_outputs(outputs):
  values = sorted(np.unique(outputs))
  uniform_values = np.arange(len(values)) / (len(values) - 1)
  return np.array([uniform_values[values.index(output)] for output in outputs])

for col in range(30):
  outputs[:, col] = scale_outputs(outputs[:, col])


n = df_test['url'].apply(lambda x:(('ell.stackexchange.com' in x) or ('english.stackexchange.com' in x))).tolist()
spelling = []

for x in n:
    if x:
        spelling.append(1.)
    else:
        spelling.append(0.)
        
        
def get_spelling_preds(indices):
    preds = []
    for id in indices:
        if 'ell.stackexchange.com' in df_train.iloc[id, 8] or 'english.stackexchange.com' in df_train.iloc[id, 8]:
            preds.append(1)
        else:
            preds.append(0)
    return preds

def postprocess_quantiles_col(preds, val_idx, col_id):
    not_val_idx = [i for i in range(df_train.shape[0]) if i not in val_idx]
    train_vals = df_train.iloc[not_val_idx, 11 + col_id]
    train_vals_distr = train_vals.value_counts().sort_index() / len(train_vals)
    
    n_filled = 0
    new_preds = preds.copy()
    args_sorted = np.argsort(preds)
    for val, percentage in train_vals_distr.items():
        n_to_fill = int(len(preds) * percentage)
        n_to_fill = max(1, n_to_fill)
        new_preds[args_sorted[n_filled: n_filled + n_to_fill]] = val
        n_filled += n_to_fill
    if n_filled < len(preds):
        new_preds[n_filled:] = val
    return new_preds

def postprocess_quantiles(preds, val_idx, cols=[]):
    postprocessed = preds.copy()
    if cols:
        for col in cols:
            postprocessed[:, col] = postprocess_quantiles_col(preds[:, col], val_idx, col)
    else:
        for col in range(30):
            postprocessed[:, col] = postprocess_quantiles_col(preds[:, col], val_idx, col)
    return postprocessed




### Evaluation metric

In [7]:
def compute_spearmanr_ignore_nan(trues, preds):
    rhos = []
    for tcol, pcol in zip(np.transpose(trues), np.transpose(preds)):
        rhos.append(spearmanr(tcol, pcol).correlation)
    return np.nanmean(rhos), rhos

### Models

### bert mark

In [8]:
def create_model_mark_5():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    qa_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states

    bert_model = TFBertModel.from_pretrained(
      BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
  
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    qa_embedding = bert_model(qa_id, attention_mask=qa_mask, token_type_ids=qa_atn)[0]

    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    qa = qa_embedding[:, 0, :]

    q = tf.keras.layers.Dropout(0.2)(q)
    qa = tf.keras.layers.Dropout(0.2)(qa)

    #qqa = tf.keras.layers.Concatenate(axis=1)([q, qa])

    q = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    #qqa = tf.keras.layers.Dense(8, activation='sigmoid')(qqa)
    qa = tf.keras.layers.Dense(9, activation='sigmoid')(qa)

    outputs = tf.keras.layers.Concatenate(axis=1)([q, qa])
  
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, qa_id, qa_mask, qa_atn], outputs=outputs)
    return model

In [9]:
def create_model_mark_4():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    qa_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states

    bert_model = TFBertModel.from_pretrained(
      BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
  
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    qa_embedding = bert_model(qa_id, attention_mask=qa_mask, token_type_ids=qa_atn)[0]

    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    qa = qa_embedding[:, 0, :]

    q = tf.keras.layers.Dropout(0.175)(q)
    qa = tf.keras.layers.Dropout(0.175)(qa)

    #qqa = tf.keras.layers.Concatenate(axis=1)([q, qa])

    q = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    #qqa = tf.keras.layers.Dense(8, activation='sigmoid')(qqa)
    qa = tf.keras.layers.Dense(9, activation='sigmoid')(qa)

    outputs = tf.keras.layers.Concatenate(axis=1)([q, qa])
  
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, qa_id, qa_mask, qa_atn], outputs=outputs)
    return model

In [10]:
def create_model_mark_3():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    qa_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states

    bert_model = TFBertModel.from_pretrained(
      BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
  
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    qa_embedding = bert_model(qa_id, attention_mask=qa_mask, token_type_ids=qa_atn)[0]

    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    qa = qa_embedding[:, 0, :]

    q = tf.keras.layers.Dropout(0.25)(q)
    qa = tf.keras.layers.Dropout(0.25)(qa)

    #qqa = tf.keras.layers.Concatenate(axis=1)([q, qa])

    q = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    #qqa = tf.keras.layers.Dense(8, activation='sigmoid')(qqa)
    qa = tf.keras.layers.Dense(9, activation='sigmoid')(qa)

    outputs = tf.keras.layers.Concatenate(axis=1)([q, qa])
  
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, qa_id, qa_mask, qa_atn], outputs=outputs)
    return model

In [11]:
def create_model_mark_2():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    qa_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states

    bert_model = TFBertModel.from_pretrained(
      BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
  
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    qa_embedding = bert_model(qa_id, attention_mask=qa_mask, token_type_ids=qa_atn)[0]

    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    qa = qa_embedding[:, 0, :]

    q = tf.keras.layers.Dropout(0.2)(q)
    qa = tf.keras.layers.Dropout(0.2)(qa)

    #qqa = tf.keras.layers.Concatenate(axis=1)([q, qa])

    q = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    #qqa = tf.keras.layers.Dense(8, activation='sigmoid')(qqa)
    qa = tf.keras.layers.Dense(9, activation='sigmoid')(qa)

    outputs = tf.keras.layers.Concatenate(axis=1)([q, qa])
  
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, qa_id, qa_mask, qa_atn], outputs=outputs)
    return model


In [12]:
def create_model_mark():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    qa_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    qa_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    #category = tf.keras.layers.Input((1,), dtype=tf.int32)

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states

    bert_model = TFBertModel.from_pretrained(
      BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
  
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    qa_embedding = bert_model(qa_id, attention_mask=qa_mask, token_type_ids=qa_atn)[0]
    #cat_embedding = tf.keras.layers.Embedding(len(le.classes_), 50, input_length=1)(category)
    #cat_embedding = tf.keras.layers.Flatten()(cat_embedding)

    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    qa = qa_embedding[:, 0, :]

    q = tf.keras.layers.Dropout(0.2)(q)
    qa = tf.keras.layers.Dropout(0.2)(qa)
    
    #q = tf.keras.layers.Concatenate(axis=1)([q, cat_embedding])
    #qa = tf.keras.layers.Concatenate(axis=1)([qa, cat_embedding])

    #qqa = tf.keras.layers.Concatenate(axis=1)([q, qa])

    q = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    #qqa = tf.keras.layers.Dense(8, activation='sigmoid')(qqa)
    qa = tf.keras.layers.Dense(9, activation='sigmoid')(qa)

    outputs = tf.keras.layers.Concatenate(axis=1)([q, qa])
  
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, qa_id, qa_mask, qa_atn], outputs=outputs)
    return model

 ### bert_ensemble 0-4 Folds

In [13]:
def create_model_0_4():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
   

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states
    bert_model = TFBertModel.from_pretrained(
        BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
    
    # if config.output_hidden_states = True, obtain hidden states via bert_model(...)[-1]
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    a_embedding = bert_model(a_id, attention_mask=a_mask, token_type_ids=a_atn)[0]
    
    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    a = tf.keras.layers.GlobalAveragePooling1D()(a_embedding)
    
    q = tf.keras.layers.Dropout(0.2)(q)
    
    a = tf.keras.layers.Dropout(0.2)(a)
    
    qa = tf.keras.layers.Concatenate(axis=1)([q, a])
    
    q_outputs = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    qa_outputs = tf.keras.layers.Dense(8, activation='sigmoid')(qa)
    a_outputs = tf.keras.layers.Dense(1, activation='sigmoid')(a)


    outputs = tf.keras.layers.Concatenate(axis=1)([q_outputs, qa_outputs, a_outputs])

    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=outputs)
    
    return model

 ### bert_ensemble 5-9 Folds

In [14]:
def create_model_5_9():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
   

    
    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states
    # caution: when using e.g. XLNet, XLNetConfig() will automatically use xlnet-large config
    
    # normally ".from_pretrained('bert-base-uncased')", but because of no internet, the 
    # pretrained model has been downloaded manually and uploaded to kaggle. 
    bert_model = TFBertModel.from_pretrained(
        BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
    
    # if config.output_hidden_states = True, obtain hidden states via bert_model(...)[-1]
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    a_embedding = bert_model(a_id, attention_mask=a_mask, token_type_ids=a_atn)[0]
  
    

    
    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    a = tf.keras.layers.GlobalAveragePooling1D()(a_embedding)
    
    q = tf.keras.layers.Dropout(0.2)(q)
    
    a = tf.keras.layers.Dropout(0.2)(a)
    
    qa = tf.keras.layers.Concatenate(axis=1)([q, a])
    
    q_outputs = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    qa_outputs = tf.keras.layers.Dense(8, activation='sigmoid')(qa)
    a_outputs = tf.keras.layers.Dense(1, activation='sigmoid')(a)
    

    outputs = tf.keras.layers.Concatenate(axis=1)([q_outputs, qa_outputs, a_outputs])
    #q_2_id, q_2_mask, q_2_atn, a_2_id, a_2_mask, a_2_atn
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=outputs)
    
    return model

 ### bert_ensemble 10-14 Folds

In [15]:
def create_model_10_14():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
   
    

    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states
    # caution: when using e.g. XLNet, XLNetConfig() will automatically use xlnet-large config
    
    # normally ".from_pretrained('bert-base-uncased')", but because of no internet, the 
    # pretrained model has been downloaded manually and uploaded to kaggle. 
    bert_model = TFBertModel.from_pretrained(
        BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
    
    # if config.output_hidden_states = True, obtain hidden states via bert_model(...)[-1]
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    a_embedding = bert_model(a_id, attention_mask=a_mask, token_type_ids=a_atn)[0]
  

    q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding)#q_embedding[:, 0, :]#
    a = tf.keras.layers.GlobalAveragePooling1D()(a_embedding)#a_embedding[:, 0, :]#
    
    q = tf.keras.layers.Dropout(0.2)(q)
    
    a = tf.keras.layers.Dropout(0.2)(a)
    
    qa = tf.keras.layers.Concatenate(axis=1)([q, a])
    
    q_outputs = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    qa_outputs = tf.keras.layers.Dense(9, activation='sigmoid')(qa)
    #a_outputs = tf.keras.layers.Dense(1, activation='sigmoid')(a)

    outputs = tf.keras.layers.Concatenate(axis=1)([q_outputs, qa_outputs])

    #q_2_id, q_2_mask, q_2_atn, a_2_id, a_2_mask, a_2_atn
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=outputs)
    
    return model

 ### bert_ensemble 15-19 Folds

In [16]:
def create_model_15_19():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)
    
   

    
    config = BertConfig() # print(config) to see settings
    config.output_hidden_states = False # Set to True to obtain hidden states
    # caution: when using e.g. XLNet, XLNetConfig() will automatically use xlnet-large config
    
    # normally ".from_pretrained('bert-base-uncased')", but because of no internet, the 
    # pretrained model has been downloaded manually and uploaded to kaggle. 
    bert_model = TFBertModel.from_pretrained(
        BERT_PATH+'bert-base-uncased-tf_model.h5', config=config)
    
    # if config.output_hidden_states = True, obtain hidden states via bert_model(...)[-1]
    q_embedding = bert_model(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
    a_embedding = bert_model(a_id, attention_mask=a_mask, token_type_ids=a_atn)[0]
  
    
    q = q_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    a = a_embedding[:, 0, :]#tf.keras.layers.GlobalAveragePooling1D()(a_embedding)
    
    q = tf.keras.layers.Dropout(0.2)(q)
    
    a = tf.keras.layers.Dropout(0.2)(a)
    
    qa = tf.keras.layers.Concatenate(axis=1)([q, a])
    
    q_outputs = tf.keras.layers.Dense(21, activation='sigmoid')(q)
    qa_outputs = tf.keras.layers.Dense(9, activation='sigmoid')(qa)
    #a_outputs = tf.keras.layers.Dense(1, activation='sigmoid')(a)

    outputs = tf.keras.layers.Concatenate(axis=1)([q_outputs, qa_outputs])

    #q_2_id, q_2_mask, q_2_atn, a_2_id, a_2_mask, a_2_atn
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=outputs)
    
    return model

## Inference

In [18]:
models = []
test_preds = []
start = time.time()
for i in range(20):
    if i > -1 and i < 5:
        model_path = f'../input/bert-ensemble/bert-fold{i}.h5'
        model = create_model_0_4()
        model.load_weights(model_path)
    elif i > 4 and i < 10:
        model_path = f'../input/bert-ensemble/bert-fold{i}.h5'
        model = create_model_5_9()
        model.load_weights(model_path)
    elif i > 9 and i < 14:
        model_path = f'../input/bert-ensemble/bert-fold{i}.h5'
        model = create_model_10_14()
        model.load_weights(model_path)
    else:
        model_path = f'../input/bert-ensemble/bert-fold{i}.h5'
        model = create_model_15_19()
        model.load_weights(model_path)
    
    
  
    print(str(i) + ' - model')
    print('Weights loaded: ', time.time() - start)
    test_predictions = model.predict(test_inputs)
    
    test_preds.append(test_predictions)

    del model
    tf.keras.backend.clear_session()
    gc.collect()
  
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')
  


#### Inference on Mark`s models

In [20]:

mark_test_preds = np.zeros((len(df_test), 30))
start = time.time()
for i in range(5):
    
    
    model = create_model_mark()
    model.load_weights( f'../input/bertv3/bert3e-5-{i}fv3.h5')
    print('Weights loaded: ', time.time() - start)
    mark_test_preds += model.predict(test_inputs) / 25
    #mark_test_preds.append(test_predictions)
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')
    

    model = create_model_mark_2()
    model.load_weights( f'../input/bert-mark/bert3e-5-{i}f.h5')
    print('Weights loaded: ', time.time() - start)
    mark_test_preds += model.predict(test_inputs) / 25
    #mark_test_preds.append(test_predictions)
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')
    

    
    model = create_model_mark_3()
    model.load_weights( f'../input/bert-mark2/bert3e-5-{i}f.h5')
    print('Weights loaded: ', time.time() - start)
    mark_test_preds += model.predict(test_inputs) / 25
    #mark_test_preds.append(test_predictions)
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')



    model = create_model_mark_4()
    model.load_weights( f'../input/bert-mark3/bert3e-5-{i}f.h5')
    print('Weights loaded: ', time.time() - start)
    mark_test_preds += model.predict(test_inputs) / 25
    #mark_test_preds.append(test_predictions)
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')
    
    model = create_model_mark_5()
    model.load_weights( f'../input/bert-mark4/bert3e-5-{i}f.h5')
    print('Weights loaded: ', time.time() - start)
    mark_test_preds += model.predict(test_inputs) / 25
    #mark_test_preds.append(test_predictions)
    print("Predictions calculated:", time.time() - start)
    print('----------------------------------')

    
    tf.keras.backend.clear_session()
    del model
    tf.keras.backend.clear_session()
    gc.collect()
    


Weights loaded:  19.997506141662598
Predictions calculated: 46.374995470047
----------------------------------
Weights loaded:  54.76851749420166
Predictions calculated: 80.27562355995178
----------------------------------
Weights loaded:  89.20894050598145
Predictions calculated: 114.17156982421875
----------------------------------
Weights loaded:  123.38343405723572
Predictions calculated: 147.95179748535156
----------------------------------
Weights loaded:  156.76055216789246
Predictions calculated: 181.5310938358307
----------------------------------
Weights loaded:  190.44049191474915
Predictions calculated: 215.29428839683533
----------------------------------
Weights loaded:  223.10839128494263
Predictions calculated: 247.9885528087616
----------------------------------
Weights loaded:  256.18144965171814
Predictions calculated: 281.23700523376465
----------------------------------
Weights loaded:  289.4767451286316
Predictions calculated: 314.2036015987396
-------------------

In [23]:
preds_nariman = np.average(test_preds, axis=0)

final_preds = ((preds_nariman*0.35) + (mark_test_preds*0.65))

In [24]:


final_preds = postprocess_quantiles(postprocess_mark(final_preds), [], cols=[5, 9])
final_preds[:, 19] = spelling


In [25]:
# #GAVE 0.423
# final_preds = preds_mark
# final_preds[:, 1] = preds_nariman[:, 1]
# final_preds[:, 3] = preds_nariman[:, 3]
# final_preds[:, 4] = preds_nariman[:, 4]
# final_preds[:, 7] = preds_nariman[:, 7]
# final_preds[:, 8] = preds_nariman[:, 8]
# final_preds[:, 10] = preds_nariman[:, 10]
# final_preds[:, 16] = preds_nariman[:, 16]
# final_preds[:, 18] = preds_nariman[:, 18]
# final_preds[:, 26] = preds_nariman[:, 26]
# final_preds[:, 28] = preds_nariman[:, 28]

In [27]:


# for i in range(preds.shape[1]):
#     preds[:, i] -= np.amin(preds[:, i])
#     preds[:, i] /= np.amax(preds[:, i])
df_sub.iloc[:, 1:] = final_preds # for weighted average set weights=[...]

df_sub.to_csv('submission.csv', index=False)

In [28]:
df_sub

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.913424,0.474873,0.2,0.531127,0.634390,0.0,0.544819,0.474622,0.667949,...,0.891284,0.866451,0.405140,0.896921,0.905565,0.767462,0.052794,0.054397,0.879457,0.771410
1,46,0.821273,0.210151,0.0,0.786500,0.781689,1.0,0.315768,0.175063,0.106917,...,0.494101,0.928367,0.465002,0.922247,0.947569,0.849771,0.911952,0.116967,0.096710,0.719807
2,70,0.865052,0.516582,0.0,0.728293,0.901685,1.0,0.410020,0.238771,0.199325,...,0.840142,0.859721,0.398799,0.878725,0.878636,0.766212,0.099920,0.054283,0.872505,0.742907
3,132,0.825589,0.175334,0.0,0.717820,0.760658,1.0,0.300792,0.129486,0.091134,...,0.598941,0.924412,0.492892,0.919292,0.951111,0.857055,0.795927,0.139120,0.649683,0.735062
4,200,0.871580,0.150992,0.0,0.816128,0.780855,1.0,0.459873,0.363910,0.165606,...,0.498874,0.860891,0.486585,0.893704,0.893761,0.791560,0.221989,0.092568,0.632834,0.747930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,9569,0.842239,0.358038,0.0,0.772706,0.804677,1.0,0.334363,0.163259,0.029743,...,0.586405,0.942469,0.520021,0.929353,0.965514,0.887291,0.935165,0.143282,0.079139,0.719842
472,9590,0.824169,0.213743,0.0,0.708283,0.791283,1.0,0.339415,0.174406,0.076199,...,0.525921,0.878598,0.446159,0.852091,0.878557,0.805854,0.764479,0.137364,0.155623,0.689073
473,9597,0.783036,0.102558,0.0,0.709988,0.797641,1.0,0.297251,0.146404,0.471745,...,0.579492,0.907477,0.437612,0.896407,0.921612,0.811333,0.531473,0.139557,0.549874,0.789518
474,9623,0.912953,0.714113,0.0,0.918641,0.918016,1.0,0.434037,0.285231,0.153474,...,0.871370,0.972082,0.634717,0.966755,0.977656,0.937250,0.131338,0.084272,0.861624,0.849877
