In [None]:
import gc
gc.enable()

import sys
sys.path.append("../input/tez-lib/")

import os
import numpy as np
import pandas as pd
import tez
import torch
import torch.nn as nn
from joblib import Parallel, delayed
from transformers import AutoConfig, AutoModel, AutoTokenizer
from torch.utils.data import Dataset,DataLoader
import glob
from timeit import default_timer as timer
from torch.utils.data.sampler import *
import torch.cuda.amp as amp
from torch.nn.parallel.data_parallel import data_parallel
from typing import Dict, Tuple

import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [None]:
def time_to_str(t, mode='min'):
    if mode=='min':
        t  = int(t)/60
        hr = t//60
        min = t%60
        return '%2d hr %02d min'%(hr,min)
    elif mode=='sec':
        t   = int(t)
        min = t//60
        sec = t%60
        return '%2d min %02d sec'%(min,sec)
    else:
        raise NotImplementedError

In [None]:
discourse_marker_to_label = {
    "B-Lead": 0,
    "I-Lead": 1,
    "B-Position": 2,
    "I-Position": 3,
    "B-Evidence": 4,
    "I-Evidence": 5,
    "B-Claim": 6,
    "I-Claim": 7,
    "B-Concluding Statement": 8,
    "I-Concluding Statement": 9,
    "B-Counterclaim": 10,
    "I-Counterclaim": 11,
    "B-Rebuttal": 12,
    "I-Rebuttal": 13,
    "O": 14,
    "PAD": -100,
}


label_to_discourse_marker = {v: k for k, v in discourse_marker_to_label.items()}

num_discourse_marker = 15 

length_threshold = {
    'Lead'                : 9,
    'Position'            : 5,
    'Claim'               : 3,
    'Counterclaim'        : 6,
    'Rebuttal'            : 4,
    'Evidence'            : 14,
    'Concluding Statement': 11,
}
probability_threshold = {
    'Lead'                : 0.70,
    'Position'            : 0.55,
    'Claim'               : 0.55,
    'Counterclaim'        : 0.50,
    'Rebuttal'            : 0.55,
    'Evidence'            : 0.65,
    'Concluding Statement': 0.70,
}
max_length = 1600
is_amp = True

In [None]:
model_grps = {
'funnel-xlarge':
    {
        'arch':'../input/xlargefunnelsnap',
        'batch_size':4,
        'max_length':max_length,
        'checkpoints':
        [
            ('../input/funnel-xlarge-result/funnel-xlarge/model_0.bin','funnel-xlarge-fold0')
        ]
    },
    
    'deberta-xlarge':{
        'arch' : '../input/debertaxlarge/deberta-xlarge',
        'batch_size':4,
        'max_length' : 1600,
        'checkpoints':
        [
            ('../input/deberta-xlarge-fold-1/model_1.bin','deberta-xlarge-fold-1'),
            ('../input/deberta-xlarge/model_3.bin','deberta-xlarge-fold-3'),
            ('../input/deberta-xlarge-fold-4/model_4.bin','deberta-xlarge-fold-4')
        ]
        
    },
    
    'deberta':{
        'arch' : '../input/debertalarge',
        'batch_size':8,
        'max_length' : max_length,
        'checkpoints':
        [
            ('../input/deberta-large-5-fold/model_0.bin','deberta_fold_0'),
            ('../input/deberta-fold-1/model_1.bin','deberta_fold_1'),
            ('../input/deberta-fold-234/fold 2/model_2.bin', 'deberta_fold_2'),
            ('../input/deberta-fold-234/fold 3/model_3.bin', 'deberta_fold_3'),
            ('../input/deberta-fold-234/fold 4/model_4.bin', 'deberta_fold_4'),
        ]    
    },
    
    'longformers' : {
        'arch' : '../input/longformerlarge4096/longformer-large-4096',
        'batch_size':8,
        'max_length' : max_length,
        'checkpoints' : [
#             ('../input/tez-fb-large/model_0.bin','longformer_fold_0'),
            ('../input/tez-fb-large/model_1.bin','longformer_fold_1'),
            ('../input/fblongformerlarge1536/model_2.bin','longformer_fold_2'),
            ('../input/tez-fb-large/model_3.bin','longformer_fold_3'),
            ('../input/fblongformerlarge1536/model_4.bin','longformer_fold_4'),
#             ('../input/fblongformerlarge1536/model_0.bin','longformer_fold_5'),
#             ('../input/fblongformerlarge1536/model_1.bin','longformer_fold_6'),
#             ('../input/tez-fb-large/model_2.bin','longformer_fold_7'),
#             ('../input/fblongformerlarge1536/model_3.bin','longformer_fold_8'),
#             ('../input/tez-fb-large/model_4.bin','longformer_fold_9')
        ]
    
    },
    'funnels':
    {
        'arch':'../input/funneltransformerlarge',
        'batch_size':8,
        'max_length':max_length,
        'checkpoints':
        [ 
#             ('../input/funnel-fold-0/fold 0/model_0.bin','funnel-fold-0'),
            ('../input/funnel-large/funnel large/fold 1 3/model/model_1.bin','funnel-large-fold-1'),
            ('../input/funnel-large/funnel large/fold 1 3/model/model_3.bin','funnel-large-fold-2'),
            ('../input/funnel-large/funnel large/fold 2/model/model_2.bin','funnel-large-fold-3'),
        ]
    },
    
}

In [None]:
def tokenize_data(texts, tokenizer, max_length = 4096):
    results = []

    for id_, text in texts:
        encoded = tokenizer.encode_plus(
            text,
            add_special_tokens=False,
            return_offsets_mapping=True,
            max_length=max_length,  # <todo>
            truncation=True,
        )
        token_id = encoded['input_ids']
        token_offset = encoded['offset_mapping']

        # add end, start token id
        token_id = [tokenizer.cls_token_id] + token_id
        token_id = token_id[: max_length - 1]  # need to set as 4096, do not abandon tokens
        token_id = token_id + [tokenizer.sep_token_id]

        # padding
        token_mask = [1] * len(token_id)

        #     padding_length = max_length - len(token_id)
        #     if padding_length > 0:
        #         if tokenizer.padding_side == 'right':
        #             token_id    = token_id    + [tokenizer.pad_token_id] * padding_length
        #             token_mask  = token_mask  + [0] * padding_length
        #         else:
        #             raise NotImplementedError

        results.append((id_, text, token_offset, token_id, token_mask))

    return results

In [None]:
class FeedbackDataset(Dataset):
    def __init__(self, tokenized_data):
        self.tokenized_data = tokenized_data
        self.length = len(self.tokenized_data)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        # text to token

        id, text, token_offset, token_id, token_mask = self.tokenized_data[index]

        # -------------------------------------
        r = {}
        r['index'] = index
        r['id'] = id
        r['text'] = text
        r['token_offset'] = str(token_offset)  # force batch loader store as list
        #         r['token_id'    ] = torch.tensor(token_id,    dtype=torch.long)
        #         r['token_mask'  ] = torch.tensor(token_mask,  dtype=torch.long)
        r['token_id'] = token_id
        r['token_mask'] = token_mask

        return r


class FeedbackDatasetValid:
    def __init__(self, samples, max_len, tokenizer):
        self.samples = samples
        self.max_len = max_len
        self.tokenizer = tokenizer
        self.length = len(samples)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        input_ids = self.samples[idx]["input_ids"]
        input_ids = [self.tokenizer.cls_token_id] + input_ids

        if len(input_ids) > self.max_len - 1:
            input_ids = input_ids[: self.max_len - 1]

        # add end token id to the input_ids
        input_ids = input_ids + [self.tokenizer.sep_token_id]
        attention_mask = [1] * len(input_ids)

        return {
            "ids": input_ids,
            "mask": attention_mask,
        }


class Collate:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, batch):
        output = dict()
        output["token_id"] = [sample["token_id"] for sample in batch]
        output["token_mask"] = [sample["token_mask"] for sample in batch]
        output["index"] = [sample['index'] for sample in batch]
        output["id"] = [sample["id"] for sample in batch]
        output["text"] = [sample["text"] for sample in batch]
        output["token_offset"] = [sample["token_offset"] for sample in batch]


        # calculate max token length of this batch
        batch_max = max([len(ids) for ids in output["token_id"]])
        # batch_max = 4096

        # add padding
        if self.tokenizer.padding_side == "right":
            output["token_id"] = [s + (batch_max - len(s)) * [self.tokenizer.pad_token_id] for s in output["token_id"]]
            output["token_mask"] = [s + (batch_max - len(s)) * [0] for s in output["token_mask"]]
        else:
            output["token_id"] = [(batch_max - len(s)) * [self.tokenizer.pad_token_id] + s for s in output["token_id"]]
            output["token_mask"] = [(batch_max - len(s)) * [0] + s for s in output["token_mask"]]

        # convert to tensors
        output["token_id"] = torch.tensor(output["token_id"], dtype=torch.long)
        output["token_mask"] = torch.tensor(output["token_mask"], dtype=torch.long)

        return output

In [None]:
valid_id = [ f.replace("\\", "/").split('/')[-1][:-4] for f in glob.glob('../input/feedback-prize-2021/test'+'/*.txt') ] # get file names
# valid_id = pd.read_csv('../input/feedback-prize-2021/train.csv').id.unique().tolist()
valid_id = sorted(valid_id)
num_valid = len(valid_id)
print('len(valid_id)',len(valid_id))

In [None]:
df_text=[]
for id in valid_id:
    text_file = '../input/feedback-prize-2021/test' +'/%s.txt'%id
    with open(text_file, 'r') as f:
        text = f.read()

    text = text.replace(u'\xa0', u' ')
    text = text.rstrip()
    text = text.lstrip()
    df_text.append((id,text))
df_text = pd.DataFrame(df_text, columns=['id','text'])
df_text['text_len'] = df_text['text'].apply(lambda x: len(x))
df_text = df_text.sort_values('text_len',ascending=False).reset_index(drop=True)
del df_text['text_len']

print('df_text.shape',df_text.shape)
print(df_text) # sort txt by its length

In [None]:
df_text

In [None]:
def text_to_word(text):
    word = text.split()
    word_offset = []

    start = 0
    for w in word:
        r = text[start:].find(w)

        if r==-1:
            raise NotImplementedError
        else:
            start = start+r
            end   = start+len(w)
            word_offset.append((start,end))
            #print('%32s'%w, '%5d'%start, '%5d'%r, text[start:end])
        start = end

    return word, word_offset

In [None]:
def create_ner_conditional_masks(id2label: Dict[int, str]) -> torch.Tensor:
    """Create a NER-conditional mask matrix which implies the relations between
    before-tag and after-tag.
    According to the rule of BIO-naming system, it is impossible that `I-Dog` cannot be
    appeard after `B-Dog` or `I-Dog` tags. This function creates the calculable
    relation-based conditional matrix to prevent from generating wrong tags.
    Args:
        id2label: A dictionary which maps class indices to their label names.
    Returns:
        A conditional mask tensor.
    """
    conditional_masks = torch.zeros(len(id2label), len(id2label))
    for i, before in id2label.items():
        for j, after in id2label.items():
            if after == "O" or after.startswith("B-") or after == f"I-{before[2:]}":
                conditional_masks[i, j] = 1.0
    return conditional_masks


In [None]:
def ner_beam_search_decode(
    log_probs: torch.Tensor, id2label: Dict[int, str], beam_size: int = 2
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Decode NER-tags from the predicted log-probabilities using beam-search.
    This function decodes the predictions using beam-search algorithm. Because all tags
    are predicted simultaneously while the tags have dependencies of their previous
    tags, the greedy algorithm cannot decode the tags properly. With beam-search, it is
    possible to prevent the below situation:
        >>> sorted = probs[t].sort(dim=-1)
        >>> print("\t".join([f"{id2label[i]} {p}" for p, i in zip()]))
        I-Dog 0.54  B-Cat 0.44  ...
        >>> sorted = probs[t + 1].sort(dim=-1)
        >>> print("\t".join([f"{id2label[i]} {p}" for p, i in zip()]))
        I-Cat 0.99  I-Dog 0.01  ...
    The above shows that if the locally-highest tags are selected, then `I-Dog, I-Dog`
    will be generated even the confidence of the second tag `I-Dog` is significantly
    lower than `I-Cat`. It is more natural that `B-Cat, I-Cat` is generated rather than
    `I-Dog, I-Dog`. The beam-search for NER-tagging task can solve this problem.
    Args:
        log_probs: The log-probabilities of the token predictions.
        id2label: A dictionary which maps class indices to their label names.
        beam_size: The number of candidates for each search step. Default is `2`.
    Returns:
        A tuple of beam-searched indices and their probability tensors.
    """
    # Create the log-probability mask for the invalid predictions.
    log_prob_masks = -10000.0 * (1 - create_ner_conditional_masks(id2label))
    log_prob_masks = log_prob_masks.to(log_probs.device)

    beam_search_shape = (log_probs.size(0), beam_size, log_probs.size(1))
    searched_tokens = log_probs.new_zeros(beam_search_shape, dtype=torch.long)
    searched_log_probs = log_probs.new_zeros(beam_search_shape)

    searched_scores = log_probs.new_zeros(log_probs.size(0), beam_size)
    searched_scores[:, 1:] = -10000.0

    for i in range(log_probs.size(1)):
        # Calculate the accumulated score (log-probabilities) with excluding invalid
        # next-tag predictions.
        scores = searched_scores.unsqueeze(2)
        scores = scores + log_probs[:, i, :].unsqueeze(1)
        scores = scores + (log_prob_masks[searched_tokens[:, :, i - 1]] if i > 0 else 0)

        # Select the top-k (beam-search size) predictions.
        best_scores, best_indices = scores.flatten(1).topk(beam_size)
        best_tokens = best_indices % scores.size(2)
        best_log_probs = log_probs[:, i, :].gather(dim=1, index=best_tokens)

        best_buckets = best_indices.div(scores.size(2), rounding_mode="floor")
        best_buckets = best_buckets.unsqueeze(2).expand(-1, -1, log_probs.size(1))

        # Gather the best buckets and their log-probabilities.
        searched_tokens = searched_tokens.gather(dim=1, index=best_buckets)
        searched_log_probs = searched_log_probs.gather(dim=1, index=best_buckets)

        # Update the predictions by inserting to the corresponding timestep.
        searched_scores = best_scores
        searched_tokens[:, :, i] = best_tokens
        searched_log_probs[:, :, i] = best_log_probs

    # Return the best beam-searched sequence and its probabilities.
    return searched_tokens[:, 0, :], searched_log_probs[:, 0, :].exp()

In [None]:
def word_probability_to_predict_df2(text_to_word_probability, id):
    
    
    len_word = len(text_to_word_probability)
    id_2_label = label_to_discourse_marker.copy()
    id_2_label.pop(-100)
    
    text_to_word = torch.Tensor( text_to_word_probability ).unsqueeze(0)
    
    word_predict, word_score =  ner_beam_search_decode( text_to_word, id_2_label, 4 )
    
    word_predict = word_predict.squeeze().cpu().numpy()
    word_score = word_score.squeeze().cpu().numpy()
    
    predict_df = []

    t = 0
    while 1:
        if word_predict[t] not in [
            discourse_marker_to_label['O'],
            discourse_marker_to_label['PAD'],
        ]:
            start = t
            b_marker_label = word_predict[t]
        else:
            t = t+1
            if t== len_word-1: break
            continue

        t = t+1
        if t== len_word-1: break

        #----
        if   label_to_discourse_marker[b_marker_label][0]=='B':
            i_marker_label = b_marker_label+1
        elif label_to_discourse_marker[b_marker_label][0]=='I':
            i_marker_label = b_marker_label
        else:
            raise NotImplementedError

        while 1:
            #print(t)
            if (word_predict[t] != i_marker_label) or (t ==len_word-1):
                end = t
                prediction_string = ' '.join([str(i) for i in range(start,end)]) #np.arange(start,end).tolist()
                discourse_type = label_to_discourse_marker[b_marker_label][2:]
                discourse_score = word_score[start:end].tolist()
                predict_df.append((id, discourse_type, prediction_string, str(discourse_score)))
                #print(predict_df[-1])
                break
            else:
                t = t+1
                continue
        if t== len_word-1: break

    predict_df = pd.DataFrame(predict_df, columns=['id', 'class', 'predictionstring', 'score'])
    return predict_df

In [None]:
def word_probability_to_predict_df(text_to_word_probability, id):
    len_word = len(text_to_word_probability)
    word_predict = text_to_word_probability.argmax(-1)
    word_score   = text_to_word_probability.max(-1)
    predict_df = []

    t = 0
    while 1:
        if word_predict[t] not in [
            discourse_marker_to_label['O'],
            discourse_marker_to_label['PAD'],
        ]:
            start = t
            b_marker_label = word_predict[t]
        else:
            t = t+1
            if t== len_word-1: break
            continue

        t = t+1
        if t== len_word-1: break

        #----
        if   label_to_discourse_marker[b_marker_label][0]=='B':
            i_marker_label = b_marker_label+1
        elif label_to_discourse_marker[b_marker_label][0]=='I':
            i_marker_label = b_marker_label
        else:
            raise NotImplementedError

        while 1:
            #print(t)
            if (word_predict[t] != i_marker_label) or (t ==len_word-1):
                end = t
                prediction_string = ' '.join([str(i) for i in range(start,end)]) #np.arange(start,end).tolist()
                discourse_type = label_to_discourse_marker[b_marker_label][2:]
                discourse_score = word_score[start:end].tolist()
                predict_df.append((id, discourse_type, prediction_string, str(discourse_score)))
                #print(predict_df[-1])
                break
            else:
                t = t+1
                continue
        if t== len_word-1: break

    predict_df = pd.DataFrame(predict_df, columns=['id', 'class', 'predictionstring', 'score'])
    return predict_df

In [None]:
def do_threshold(submit_df, use=['length','probability']):
    df = submit_df.copy()
    df = df.fillna('')

    if 'length' in use:
        df['l'] = df.predictionstring.apply(lambda x: len(x.split()))
        for key, value in length_threshold.items():
            #value=3
            index = df.loc[df['class'] == key].query('l<%d'%value).index
            df.drop(index, inplace=True)

    if 'probability' in use:
        df['s'] = df.score.apply(lambda x: np.mean(eval(x)))
        for key, value in probability_threshold.items():
            index = df.loc[df['class'] == key].query('s<%f'%value).index
            df.drop(index, inplace=True)

    df = df[['id', 'class', 'predictionstring']]
    return df

In [None]:
class FeedbackModel(tez.Model):
    def __init__(self, model_name, num_labels):
        super().__init__()
        self.model_name = model_name
        self.num_labels = num_labels
        config = AutoConfig.from_pretrained(model_name)

        hidden_dropout_prob: float = 0.1
        layer_norm_eps: float = 1e-7
        config.update(
            {
                "output_hidden_states": True,
                "hidden_dropout_prob": hidden_dropout_prob,
                "layer_norm_eps": layer_norm_eps,
                "add_pooling_layer": False,
            }
        )
        self.transformer = AutoModel.from_config(config)
        self.output = nn.Linear(config.hidden_size, self.num_labels)

    def forward(self, ids, mask):
        transformer_out = self.transformer(ids, mask)
        sequence_output = transformer_out.last_hidden_state
        logits = self.output(sequence_output)
        logits = torch.softmax(logits, dim=-1)
        return logits

In [None]:
def jn(pst, start, end):
    return " ".join([str(x) for x in pst[start:end]])

def link_evidence(oof):
    thresh = 1
    idu = oof['id'].unique()
    idc = idu[1]
    eoof = oof[oof['class'] == "Evidence"]
    neoof = oof[oof['class'] != "Evidence"]
    for thresh2 in range(26, 27, 1):
        retval = []
        for idv in idu:
            for c in ['Lead', 'Position', 'Evidence', 'Claim', 'Concluding Statement',
                      'Counterclaim', 'Rebuttal']:
                q = eoof[(eoof['id'] == idv) & (eoof['class'] == c)]
                if len(q) == 0:
                    continue
                pst = []
                for i, r in q.iterrows():
                    pst = pst + [-1] + [int(x) for x in r['predictionstring'].split()] # -1作为分割合并的间隔
                start = 1
                end = 1
                for i in range(2, len(pst)):
                    cur = pst[i]
                    end = i
                    # if pst[start] == 205:
                    #   print(cur, pst[start], cur - pst[start])
                    # Evidence的情况下：会一直延续到cur==-1的情况，然后判断下一个token是不是和之前的token仍旧满足26的最大距离要求，
                    # 如果满足就相连，否则就append到retval里
                    if (cur == -1 and c != 'Evidence') or ((cur == -1) and (
                            (pst[i + 1] > pst[end - 1] + thresh) or (pst[i + 1] - pst[start] > thresh2))): #
                        retval.append((idv, c, jn(pst, start, end)))
                        start = i + 1
                v = (idv, c, jn(pst, start, end + 1))
                # print(v)
                retval.append(v)
        roof = pd.DataFrame(retval, columns=['id', 'class', 'predictionstring'])
        roof = roof.merge(neoof, how='outer')
        return roof

In [None]:
def run_submit():
    results = []
    all_model_idx = 0

    for model_type, model_grp in model_grps.items():

        print(f'{model_type} loaded ok.\n')

        arch = model_grp['arch']
        num_models = len(model_grp['checkpoints'])

        tokenizer = AutoTokenizer.from_pretrained(arch)

        split_texts = np.array_split(df_text[['id', 'text']].values, 2)

        tokenized_text_ = Parallel(n_jobs=2, backend="multiprocessing")(
            delayed(tokenize_data)(text, tokenizer, model_grp['max_length']) for text in split_texts
        )

        tokenized_text = []

        for t in tokenized_text_:
            tokenized_text.extend(t)

        del tokenized_text_

        collate_func = Collate(tokenizer)
        valid_dataset = FeedbackDataset(tokenized_text)
        valid_loader = DataLoader(
            valid_dataset,
            sampler=SequentialSampler(valid_dataset),
            batch_size=model_grp['batch_size'],  # 4, #
            drop_last=False,
            num_workers=2,  # 0, #
            pin_memory=False,
            collate_fn = collate_func,
        )

        # start here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
        results_n = {
            'id': [],
            'token_mask': [],
            'token_offset': [],
            'probability': [],
        }
       
        for model_idx, (checkpoint, name) in enumerate(model_grp['checkpoints']):
            net_template = FeedbackModel(arch, 15)
            net_template.load_state_dict(torch.load(checkpoint))
            net_template.cuda()

            print('load ok : [%d] %s' % (all_model_idx, name))
            all_model_idx += 1

            T = 0
            start_timer = timer()
            for t, batch in enumerate(valid_loader):
                batch_size = len(batch['index'])
                token_mask = batch['token_mask']
                token_id = batch['token_id']
#                 print("data_shape", token_id.shape)
                token_mask = token_mask.cuda()
                token_id = token_id.cuda()

                net_template.eval()
                with torch.no_grad():
                    with amp.autocast(enabled=is_amp):
                        probability = data_parallel(net_template, (token_id, token_mask))
                        # probability = net[n](token_id, token_mask)

                        if model_idx == 0:
                            results_n['probability'].append(
                                ((probability / num_models) * 255).byte().data.cpu().numpy())
                            results_n['token_offset'] += [eval(x) for x in batch['token_offset']]
                            results_n['token_mask'].append(token_mask.bool().data.cpu().numpy())
                            results_n['id'] += batch['id']

                        else:
                            results_n['probability'][t] = results_n['probability'][t] + (
                                        (probability / num_models) * 255).byte().data.cpu().numpy()

                        T += batch_size

                print('\r\t%d/%d  %s' % (T, len(valid_dataset), time_to_str(timer() - start_timer, 'sec')), end='',
                      flush=True)
#                 del token_id,token_mask
            del net_template,token_id,token_mask
            
            gc.collect()

            torch.cuda.empty_cache()
            print('')
       
        # ----------------------------
        del valid_dataset, valid_loader, tokenized_text
        gc.collect()
        
        print('')

        prob_list = []
        for b_idx in range(len(results_n["probability"])):
            prob_list.extend(np.split(results_n["probability"][b_idx], len(results_n["probability"][b_idx])))

        results.append({
            'probability': prob_list,
            'token_offset': np.array(results_n['token_offset'], object)
            # 'token_mask'] = np.concatenate(results['token_mask'])
            # 'id'] = np.array(results['id' ])
        })
    # ------------------------------------------------------------------------
    # results: [num_net, [prob list]]
    num_net = len(model_grps.keys())

    submit_df = []
    for i in range(num_valid):
        d = df_text.iloc[i]
        id = d.id
        text = d.text
        word, word_offset = text_to_word(text)
        # print(i,id[i], len(text), len(word))

        # ensemble -----
        token_to_text_probability = np.full((len(text), num_discourse_marker), 0, np.float32)
        for j in range(num_net):
            p = results[j]['probability'][i][0][1:] / 255 # due to np.split adding another dim [0]
            for t, (start, end) in enumerate(results[j]['token_offset'][i]):
                if t == max_length - 1: break  # assume max_length, else use token_mask to get length
                token_to_text_probability[start:end] += (p[t])  # **0.5
        token_to_text_probability = token_to_text_probability / num_net
        # ensemble -----

        text_to_word_probability = np.full((len(word), num_discourse_marker), 0, np.float32)
        for t, (start, end) in enumerate(word_offset):
            text_to_word_probability[t] = token_to_text_probability[start:end].mean(0)

        predict_df = word_probability_to_predict_df2(text_to_word_probability, id)
        submit_df.append(predict_df)
        # print('\r preparing submit_df :', i, id, len(text), len(word), end ='', flush=True)
    print('')

    # ----------------------------------------
    submit_df = pd.concat(submit_df).reset_index(drop=True)
    submit_df = do_threshold(submit_df, use=['length', 'probability'])
#     submit_df = link_evidence(submit_df)
    submit_df.to_csv('submission.csv', index=False)

    print('----')
    for t in range(3): print(submit_df.iloc[t], '\n')
    print('submission ok!----')

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
run_submit()