- 1st stage
    - longformer 2048
    - roberta 512
    - bart 512
    - deberta 1024
    - funnel 512
    - distilbart_cnn 512
    

- Optimization
    - ensemble weight (model)
    - label weight (normlization)


- 2nd stage
    - XGB 
    - MLP
    - LSTM

In [None]:
# =================================
# Library
# =================================
import gc
import os
import pickle

import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

import xgboost as xgb

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.cuda.amp import autocast, GradScaler

import transformers
from transformers import LongformerTokenizer, LongformerModel,AutoTokenizer,RobertaModel,BartModel,DebertaModel,FunnelModel
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [None]:
# =================================
# Constant
# =================================
SUB_PATH = "../input/feedback-prize-2021/sample_submission.csv"
DATA_PATH = "../input/feedback-prize-2021/test/"

XGB_MODEL = "../input/expv2-en-038-xgb-mlp-lstm-fe-fix/model"  # /xgb_fold{fold}.pkl
MLP_MODEL = "../input/expv2-en-038-xgb-mlp-lstm-fe-fix/model"  # /mean_std_df.csv, /mlp_{fold}.pth
LSTM_MODEL = "../input/expv2-en-038-xgb-mlp-lstm-fe-fix/model" # lstm_{fold}.pth

# =================================
# Config
# =================================
class CFG_ex019:
    max_len = 2048
    batch_size = 8
    tokenizer_path = "../input/longformer-large-4096-tokenizer/longformer-large-4096"
    model_path = "../input/feed-ex019"
    model_prefix = "ex019_"
    n_fold = 5
    
class CFG_ex046:
    max_len = 512
    batch_size = 16
    tokenizer_path = "../input/roberta-large-tokenizer/roberta-large"
    model_path = "../input/feed-ex046"
    model_prefix = "ex046_"
    n_fold = 5
    
class CFG_ex048:
    max_len = 512
    batch_size = 16
    tokenizer_path = "../input/bart-large-tokenizer/bart-large"
    model_path = "../input/feed-ex048"
    model_prefix = "ex048_"
    n_fold = 5
    
class CFG_ex067:
    max_len = 1024
    batch_size = 8
    tokenizer_path = "../input/deberta-large-tokenizer/deberta-large"
    model_path = "../input/feed-ex067"
    model_prefix = "ex067_"
    n_fold = 5
    
class CFG_ex051:
    max_len = 512
    batch_size = 16
    tokenizer_path = "../input/funnel-large-tokenizer/funnel-large"
    model_path = "../input/feed-ex051-model"
    model_prefix = "ex051_"
    n_fold = 5
    
class CFG_ex064:
    max_len = 512
    batch_size = 16
    tokenizer_path = "../input/distilbart-tokenizer/distilbart"
    model_path = "../input/feed-ex064-model"
    model_prefix = "ex064_"
    n_fold=5

features = [
    'class', 'pred_len', 'proba', 'start',
    
    'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4',
    'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9',
    'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14',
    
    'pred_std_0', 'pred_std_1', 'pred_std_2', 'pred_std_3', 'pred_std_4', 
    'pred_std_5', 'pred_std_6', 'pred_std_7', 'pred_std_8', 'pred_std_9', 
    'pred_std_10', 'pred_std_11', 'pred_std_12', 'pred_std_13', 'pred_std_14',
    
    'id_len_mean', 'id_len_mean_diff', 'id_proba_mean', 'id_proba_mean_diff',
    'id_class_count',
    'id_class_len_mean', 'id_class_len_mean_diff',
    'id_class_proba_mean', 'id_class_proba_mean_diff',
    
    'pred_max_0', 'pred_max_1', 'pred_max_2', 'pred_max_3', 'pred_max_4',
    'pred_max_5', 'pred_max_6', 'pred_max_7', 'pred_max_8', 'pred_max_9',
    'pred_max_10', 'pred_max_11', 'pred_max_12', 'pred_max_13', 'pred_max_14',
    
    'pred_min_0', 'pred_min_1', 'pred_min_2', 'pred_min_3', 'pred_min_4', 
    'pred_min_5', 'pred_min_6', 'pred_min_7', 'pred_min_8', 'pred_min_9', 
    'pred_min_10', 'pred_min_11', 'pred_min_12', 'pred_min_13', 'pred_min_14',
    
    "second"]

cat_cols = ["class", "start", "second"]

num_cols = [
    'pred_len', 'proba',
    'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4',
    'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9',
    'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14', 
    
    'pred_std_0', 'pred_std_1', 'pred_std_2', 'pred_std_3', 'pred_std_4',
    'pred_std_5', 'pred_std_6', 'pred_std_7', 'pred_std_8', 'pred_std_9',
    'pred_std_10', 'pred_std_11', 'pred_std_12', 'pred_std_13', 'pred_std_14',
    
    'id_len_mean', 'id_len_mean_diff', 'id_proba_mean', 'id_proba_mean_diff', 
    'id_class_count', 'id_class_len_mean', 'id_class_len_mean_diff', 
    'id_class_proba_mean','id_class_proba_mean_diff']


pred_dict_first = {'Claim': 0.325,
 'Concluding Statement': 0.4,
 'Counterclaim': 0.25,
 'Evidence': 0.375,
 'Lead': 0.375,
 'Position': 0.275,
 'Rebuttal': 0.225}

pred_dict_second = {'Claim': 0.35000000000000003,
 'Concluding Statement': 0.42500000000000004,
 'Counterclaim': 0.325,
 'Evidence': 0.375,
 'Lead': 0.375,
 'Position': 0.325,
 'Rebuttal': 0.225}

classes = [
    "Lead",
    "Claim",
    "Position",
    "Evidence",
    "Counterclaim",
    "Concluding Statement",
    "Rebuttal"]


target_map_rev = {0:'Lead', 1:'Position', 2:'Evidence', 3:'Claim', 4:'Concluding Statement',
             5:'Counterclaim', 6:'Rebuttal', 7:'blank'}

target_map = {'Lead':0, 'Position':1, 'Evidence':2, 'Claim':3, 'Concluding Statement':4,
              'Counterclaim':5, 'Rebuttal':6}


In [None]:
# =================================
# Function
# =================================
def get_preds_collate(text_ids, preds, preds_len):
    all_predictions = []

    for id_num in tqdm(range(len(preds))):
    
        # GET ID
        #if (id_num%100==0)&(verbose): 
        #    print(id_num,', ',end='')
        n = text_ids[id_num]
        max_len = int(preds_len[id_num])
        # GET TOKEN POSITIONS IN CHARS
        name = f'../input/feedback-prize-2021/test/{n}.txt'
        txt = open(name, 'r').read()
        tokens = tokenizer.encode_plus(txt, max_length=max_len, padding='max_length',
                                   truncation=True, return_offsets_mapping=True)
        off = tokens['offset_mapping']
    
        # GET WORD POSITIONS IN CHARS
        w = []
        blank = True
        for i in range(len(txt)):
            if (txt[i]!=' ')&(txt[i]!='\n')&(txt[i]!='\xa0')&(txt[i]!='\x85')&(blank==True):
                w.append(i)
                blank=False
            elif (txt[i]==' ')|(txt[i]=='\n')|(txt[i]=='\xa0')|(txt[i]=='\x85'):
                blank=True
        w.append(1e6)
            
        # MAPPING FROM TOKENS TO WORDS
        word_map = -1 * np.ones(max_len,dtype='int32')
        w_i = 0
        for i in range(len(off)):
            if off[i][1]==0: continue
            while off[i][0]>=w[w_i+1]: w_i += 1
            word_map[i] = int(w_i)
        
        # CONVERT TOKEN PREDICTIONS INTO WORD LABELS
        ### KEY: ###
        # 0: LEAD_B, 1: LEAD_I
        # 2: POSITION_B, 3: POSITION_I
        # 4: EVIDENCE_B, 5: EVIDENCE_I
        # 6: CLAIM_B, 7: CLAIM_I
        # 8: CONCLUSION_B, 9: CONCLUSION_I
        # 10: COUNTERCLAIM_B, 11: COUNTERCLAIM_I
        # 12: REBUTTAL_B, 13: REBUTTAL_I
        # 14: NOTHING i.e. O
        ### NOTE THESE VALUES ARE DIVIDED BY 2 IN NEXT CODE LINE
        pred = preds[id_num,]/2.0
    
        i = 0
        while i<max_len:
            prediction = []
            start = pred[i]
            if start in [0,1,2,3,4,5,6,7]:
                prediction.append(word_map[i])
                i += 1
                if i>=max_len: break
                while pred[i]==start+0.5:
                    if not word_map[i] in prediction:
                        prediction.append(word_map[i])
                    i += 1
                    if i>=max_len: break
            else:
                i += 1
            prediction = [x for x in prediction if x!=-1]
            if len(prediction)>4:
                all_predictions.append( (n, target_map_rev[int(start)], 
                                ' '.join([str(x) for x in prediction]) ) )
                
    # MAKE DATAFRAME
    df = pd.DataFrame(all_predictions)
    df.columns = ['id','class','predictionstring']
    
    return df

def get_preds_collate_xgboost(text_ids, preds, preds_len, preds_raw_max, preds_raw,th_len,tokenizer):
    all_predictions = []
    all_predictions_mean = []
    all_predictions_std = []
    all_predictions_max = []
    all_predictions_min = []
    for id_num in tqdm(range(len(preds))):
        k = 0
        # GET ID
        #if (id_num%100==0)&(verbose): 
        #    print(id_num,', ',end='')
        n = text_ids[id_num]
        max_len = int(preds_len[id_num])
        # GET TOKEN POSITIONS IN CHARS
        name = f'../input/feedback-prize-2021/test/{n}.txt'
        txt = open(name, 'r').read()
        tokens = tokenizer.encode_plus(txt, max_length=max_len, padding='max_length',
                                   truncation=True, return_offsets_mapping=True)
        off = tokens['offset_mapping']
    
        # GET WORD POSITIONS IN CHARS
        w = []
        blank = True
        for i in range(len(txt)):
            if (txt[i]!=' ')&(txt[i]!='\n')&(txt[i]!='\xa0')&(txt[i]!='\x85')&(blank==True):
                w.append(i)
                blank=False
            elif (txt[i]==' ')|(txt[i]=='\n')|(txt[i]=='\xa0')|(txt[i]=='\x85'):
                blank=True
        w.append(1e6)
            
        # MAPPING FROM TOKENS TO WORDS
        word_map = -1 * np.ones(max_len,dtype='int32')
        w_i = 0
        for i in range(len(off)):
            if off[i][1]==0: continue
            while off[i][0]>=w[w_i+1]: w_i += 1
            word_map[i] = int(w_i)
        
        # CONVERT TOKEN PREDICTIONS INTO WORD LABELS
        ### KEY: ###
        # 0: LEAD_B, 1: LEAD_I
        # 2: POSITION_B, 3: POSITION_I
        # 4: EVIDENCE_B, 5: EVIDENCE_I
        # 6: CLAIM_B, 7: CLAIM_I
        # 8: CONCLUSION_B, 9: CONCLUSION_I
        # 10: COUNTERCLAIM_B, 11: COUNTERCLAIM_I
        # 12: REBUTTAL_B, 13: REBUTTAL_I
        # 14: NOTHING i.e. O
        ### NOTE THESE VALUES ARE DIVIDED BY 2 IN NEXT CODE LINE
        pred = preds[id_num,]/2.0
        pred_raw_max = preds_raw_max[id_num]
        pred_raw = preds_raw[id_num]
        i = 0
        
        while i<max_len:
            prediction = []
            prediction_max_proba = []
            prediction_proba = []
            start = pred[i]
            if start in [0,1,2,3,4,5,6,7]:
                prediction.append(word_map[i])
                prediction_max_proba.append(pred_raw_max[i])
                prediction_proba.append(pred_raw[i])
                i += 1
                if i>=max_len: break
                while pred[i]==start+0.5:
                    if not word_map[i] in prediction:
                        prediction.append(word_map[i])
                    prediction_max_proba.append(pred_raw_max[i])
                    prediction_proba.append(pred_raw[i])
                    
                    i += 1
                    if i>=max_len: break
            elif start in [0.5,1.5,2.5,3.5,4.5,5.5,6.5]:
                prediction.append(word_map[i])
                prediction_max_proba.append(pred_raw_max[i])
                prediction_proba.append(pred_raw[i])
                i += 1
                if i>=max_len: break
                while pred[i]==start:
                    if not word_map[i] in prediction:
                        prediction.append(word_map[i])
                    prediction_max_proba.append(pred_raw_max[i])
                    prediction_proba.append(pred_raw[i])
                    i += 1
                    if i>=max_len: break
            else:
                i += 1
                
            prediction = [x for x in prediction if x!=-1]
            if start in [0,1,2,3,4,5,6]:
                if len(prediction)>th_len:
                    all_predictions.append( (n, target_map_rev[int(start)], 
                                    ' '.join([str(x) for x in prediction]),len(prediction),np.mean(prediction_max_proba),1))
                    all_predictions_mean.append(np.mean(prediction_proba,axis=0))
                    all_predictions_std.append(np.std(prediction_proba,axis=0))
                    all_predictions_max.append(np.max(prediction_proba,axis=0))
                    all_predictions_min.append(np.min(prediction_proba,axis=0))
            elif start in [0.5,1.5,2.5,3.5,4.5,5.5,6.5]:
                if len(prediction)>th_len:
                    all_predictions.append( (n, target_map_rev[int(start - 0.5)], 
                                    ' '.join([str(x) for x in prediction]),len(prediction),np.mean(prediction_max_proba),0) )
                    all_predictions_mean.append(np.mean(prediction_proba,axis=0))
                    all_predictions_std.append(np.std(prediction_proba,axis=0))
                    all_predictions_max.append(np.max(prediction_proba,axis=0))
                    all_predictions_min.append(np.min(prediction_proba,axis=0))
        k += 1
    # MAKE DATAFRAME
    df = pd.DataFrame(all_predictions)
    df.columns = ['id','class','predictionstring',"pred_len","proba","start"]
    
    
    return df,all_predictions_mean,all_predictions_std,all_predictions_max,all_predictions_min


def get_preds_collate_xgboost_second(text_ids, preds, preds_len, preds_max, preds_raw,preds_second_max,tokenizer):
    all_predictions = []
    all_predictions_mean = []
    all_predictions_std = []
    all_predictions_max = []
    all_predictions_min = []
    for id_num in tqdm(range(len(preds))):
        k = 0
        # GET ID
        #if (id_num%100==0)&(verbose): 
        #    print(id_num,', ',end='')
        n = text_ids[id_num]
        max_len = int(preds_len[id_num])
        # GET TOKEN POSITIONS IN CHARS
        name = f'../input/feedback-prize-2021/test/{n}.txt'
        txt = open(name, 'r').read()
        tokens = tokenizer.encode_plus(txt, max_length=max_len, padding='max_length',
                                   truncation=True, return_offsets_mapping=True)
        off = tokens['offset_mapping']
    
        # GET WORD POSITIONS IN CHARS
        w = []
        blank = True
        for i in range(len(txt)):
            if (txt[i]!=' ')&(txt[i]!='\n')&(txt[i]!='\xa0')&(txt[i]!='\x85')&(blank==True):
                w.append(i)
                blank=False
            elif (txt[i]==' ')|(txt[i]=='\n')|(txt[i]=='\xa0')|(txt[i]=='\x85'):
                blank=True
        w.append(1e6)
            
        # MAPPING FROM TOKENS TO WORDS
        word_map = -1 * np.ones(max_len,dtype='int32')
        w_i = 0
        for i in range(len(off)):
            if off[i][1]==0: continue
            while off[i][0]>=w[w_i+1]: w_i += 1
            word_map[i] = int(w_i)
        
        # CONVERT TOKEN PREDICTIONS INTO WORD LABELS
        ### KEY: ###
        # 0: LEAD_B, 1: LEAD_I
        # 2: POSITION_B, 3: POSITION_I
        # 4: EVIDENCE_B, 5: EVIDENCE_I
        # 6: CLAIM_B, 7: CLAIM_I
        # 8: CONCLUSION_B, 9: CONCLUSION_I
        # 10: COUNTERCLAIM_B, 11: COUNTERCLAIM_I
        # 12: REBUTTAL_B, 13: REBUTTAL_I
        # 14: NOTHING i.e. O
        ### NOTE THESE VALUES ARE DIVIDED BY 2 IN NEXT CODE LINE
        pred = preds[id_num,]/2
        pred_max = preds_max[id_num]
        pred_second_max = preds_second_max[id_num,]/2
        pred_raw = preds_raw[id_num]
        #fold_ = fold[id_num]
        i = 0
        while i<max_len:
            prediction = []
            prediction_second = []
            prediction_max_proba = []
            prediction_proba = []
            start = pred[i]
            start_second = pred_second_max[i]
            if (start in [7]) & (start_second in [0,1,2,3,4,5,6]):
                prediction.append(word_map[i])
                prediction_second.append(word_map[i])
                prediction_max_proba.append(pred_max[i])
                prediction_proba.append(pred_raw[i])
                i += 1
                if i>=max_len: break
                while (pred[i]==start) & (pred_second_max[i] == start_second + 0.5):
                    if not word_map[i] in prediction_second:
                        prediction_second.append(word_map[i])
                    prediction_max_proba.append(pred_max[i])
                    prediction_proba.append(pred_raw[i])
                    i += 1
                    if i>=max_len: break
            elif (start in [7]) &  (start_second in [0.5,1.5,2.5,3.5,4.5,5.5,6.5]):
                prediction.append(word_map[i])
                prediction_second.append(word_map[i])
                prediction_max_proba.append(pred_max[i])
                prediction_proba.append(pred_raw[i])
                i += 1
                if i>=max_len: break
                while (pred[i]==start) & (pred_second_max[i] == start_second):
                    if not word_map[i] in prediction_second:
                        prediction_second.append(word_map[i])
                    prediction_max_proba.append(pred_max[i])
                    prediction_proba.append(pred_raw[i])
                    i += 1
                    if i>=max_len: break
            else:
                i += 1
                
            prediction_second = [x for x in prediction_second if x!=-1]
            if start_second in [0,1,2,3,4,5,6]:
                if len(prediction_second)>4:
                    all_predictions.append( (n, target_map_rev[int(start_second)], 
                                        ' '.join([str(x) for x in prediction_second]) ,len(prediction_second), 
                                             np.mean(prediction_max_proba),1))
                    all_predictions_mean.append(np.mean(prediction_proba,axis=0))
                    all_predictions_std.append(np.std(prediction_proba,axis=0))
                    all_predictions_max.append(np.max(prediction_proba,axis=0))
                    all_predictions_min.append(np.min(prediction_proba,axis=0))
            elif start_second in [0.5,1.5,2.5,3.5,4.5,5.5,6.5]:
                if len(prediction_second)>4:
                        all_predictions.append( (n, target_map_rev[int(start_second - 0.5)], 
                                        ' '.join([str(x) for x in prediction_second]) , len(prediction_second),
                                        np.mean(prediction_max_proba),0) )
                        all_predictions_mean.append(np.mean(prediction_proba,axis=0))
                        all_predictions_std.append(np.std(prediction_proba,axis=0))
                        all_predictions_max.append(np.max(prediction_proba,axis=0))
                        all_predictions_min.append(np.min(prediction_proba,axis=0))
        k += 1
    # MAKE DATAFRAME
    df = pd.DataFrame(all_predictions)
    df.columns = ['id','class','predictionstring',"pred_len","proba","start"]
    
    return df,all_predictions_mean,all_predictions_std,all_predictions_max,all_predictions_min

In [None]:
# =================================
# Dataset & Model
# =================================
class TestDataset(Dataset):
    def __init__(self, ids, max_len, tokenizer):
        self.ids = ids
        self.max_len = max_len
        self.tokenizer = tokenizer

    def __getitem__(self, index):
        # GET TEXT AND WORD LABELS 
        name = f'{DATA_PATH}{self.ids[index]}.txt'
        txt = open(name, 'r').read()
        tokens = self.tokenizer.encode_plus(txt, max_length=self.max_len, padding='max_length',
                                   truncation=True, return_offsets_mapping=True)
        return {
          'token': torch.tensor(tokens['input_ids'], dtype=torch.long),
          'mask': torch.tensor(tokens['attention_mask'], dtype=torch.long),
           }

    def __len__(self):
        return len(self.ids)
    
    
def collatte(d,train=True):
    mask_len = int(d["mask"].sum(axis=1).max())
    if train:
        return {"token" : d['token'][:,:mask_len],
                 "mask" : d['mask'][:,:mask_len],
                 "y" : d['y'][:,:mask_len],
                  "max_len" : mask_len}
    else:
        return {"token" : d['token'][:,:mask_len],
                 "mask" : d['mask'][:,:mask_len],
                  "max_len" : mask_len}

    
class custom_model_ex019(nn.Module):
    def __init__(self):
        super(custom_model_ex019, self).__init__()
        self.backbone = LongformerModel.from_pretrained(
            CFG_ex019.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out
    
    
class custom_model_ex025(nn.Module):
    def __init__(self):
        super(custom_model_ex025, self).__init__()
        self.backbone = LongformerModel.from_pretrained(
            CFG_ex025.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
        
        self.linear2_1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2_8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        
        output3_1 = self.linear2_1(output_concat)
        output3_2 = self.linear2_2(output_concat)
        output3_3 = self.linear2_3(output_concat)
        output3_4 = self.linear2_4(output_concat)
        output3_5 = self.linear2_5(output_concat)
        output3_6 = self.linear2_6(output_concat)
        output3_7= self.linear2_7(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        out2 = torch.cat(
            [output3_1,output3_2,output3_3,output3_4,
             output3_5,output3_6,output3_7], axis=2)
        return out
    
    
class custom_model_ex046(nn.Module):
    def __init__(self):
        super(custom_model_ex046, self).__init__()
        self.backbone = RobertaModel.from_pretrained(
            CFG_ex046.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out
    
class custom_model_ex048(nn.Module):
    def __init__(self):
        super(custom_model_ex048, self).__init__()
        self.backbone = BartModel.from_pretrained(
            CFG_ex048.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out
    
class custom_model_ex067(nn.Module):
    def __init__(self):
        super(custom_model_ex067, self).__init__()
        self.backbone = DebertaModel.from_pretrained(
            CFG_ex067.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out
    
class custom_model_ex051(nn.Module):
    def __init__(self):
        super(custom_model_ex051, self).__init__()
        self.backbone = FunnelModel.from_pretrained(
            CFG_ex051.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out

    
class custom_model_ex064(nn.Module):
    def __init__(self):
        super(custom_model_ex064, self).__init__()
        self.backbone = BartModel.from_pretrained(
            CFG_ex064.tokenizer_path, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out
    

In [None]:
class mlp_Dataset(Dataset):
    def __init__(self, class_val, start_val, second_val, num_features, shift1class_val, shiftm1class_val, y = None, train=True):
        self.class_val = class_val
        self.start_val = start_val
        self.second_val = second_val
        self.num_features = num_features
        self.shift1class_val = shift1class_val
        self.shiftm1class_val = shiftm1class_val
        self.y = y
        self.train = train
    
    def __len__(self):
        return len(self.class_val)

    def __getitem__(self, item):
        # Return the processed data where the lists are converted to `torch.tensor`s
        if self.train : 
            return {
              'input_data_class': torch.tensor(self.class_val[item], dtype=torch.long),
              'input_data_start': torch.tensor(self.start_val[item], dtype=torch.long),
              'input_data_second': torch.tensor(self.second_val[item], dtype=torch.long),
              'input_data_shift1_class': torch.tensor(self.shift1class_val[item], dtype=torch.long),
              'input_data_shiftm1_class': torch.tensor(self.shiftm1class_val[item], dtype=torch.long),
              'input_data_num_features': torch.tensor(self.num_features[item], dtype=torch.float32),
              "y":torch.tensor(self.y[item], dtype=torch.float32)
               }
        else:
            return {
              'input_data_class': torch.tensor(self.class_val[item], dtype=torch.long),
              'input_data_start': torch.tensor(self.start_val[item], dtype=torch.long),
              'input_data_second': torch.tensor(self.second_val[item], dtype=torch.long), 
              'input_data_shift1_class': torch.tensor(self.shift1class_val[item], dtype=torch.long),
              'input_data_shiftm1_class': torch.tensor(self.shiftm1class_val[item], dtype=torch.long),
              'input_data_num_features': torch.tensor(self.num_features[item], dtype=torch.float32),
               }

class mlp_model(nn.Module):
    def __init__(
            self, dropout=0.2, class_unique = 7, start_unique=2, second_unique = 2, class_emb = 15, start_emb = 5,
            num_emb = 107, emb = 2048,
            shift1_class = 8, shiftm1_class = 8):
        super(mlp_model, self).__init__()
        self.embedding_class = nn.Embedding(
            num_embeddings=class_unique, embedding_dim=class_emb)
        self.embedding_start = nn.Embedding(
            num_embeddings=start_unique, embedding_dim=start_emb)
        self.embedding_second = nn.Embedding(
            num_embeddings=second_unique, embedding_dim=start_emb)
        self.embedding_shift1_class = nn.Embedding(
            num_embeddings=shift1_class, embedding_dim=class_emb)
        self.embedding_shiftm1_class = nn.Embedding(
            num_embeddings=shiftm1_class, embedding_dim=class_emb)
        self.concat_embedding = nn.Sequential(
            nn.Linear(class_emb*3 + start_emb*2 + num_emb, emb),
            nn.BatchNorm1d(emb)
        )
        
        self.batch_norm_c1 = nn.BatchNorm1d(256)
        self.dropout_c1 = nn.Dropout(0.2)
        self.conv1 = nn.Conv1d(128,256,kernel_size = 5, stride = 1, padding=2)
        self.relu1 = nn.ReLU()
        
        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size = 8)

        self.batch_norm_c2 = nn.BatchNorm1d(256)
        self.dropout_c2 = nn.Dropout(0.2)
        self.conv2 = nn.Conv1d(256,256,kernel_size = 5, stride = 1, padding=2)
        self.relu2 = nn.ReLU()
        self.flt = nn.Flatten()
        
        self.linear3 = nn.Linear(2048, 512)
        self.batch_norm_c3 = nn.BatchNorm1d(512)
        self.dropout_c3 = nn.Dropout(0.2)
        self.relu3= nn.ReLU()
        self.linear4 = nn.Linear(512, 1)
        
        

    def forward(self, class_val, start_val, second_val, num_val, 
                shift1class_val, shiftm1class_val):
        embedding_class = self.embedding_class(class_val)
        embedding_start = self.embedding_start(start_val)
        embedding_second = self.embedding_start(second_val)
        embedding_shift1_class = self.embedding_shift1_class(shift1class_val)
        embedding_shiftm1_class = self.embedding_shiftm1_class(shiftm1class_val)

        embedding_concat = torch.cat(
            [embedding_class, embedding_start, embedding_second , num_val,
             embedding_shift1_class, embedding_shiftm1_class], axis=1)
        embedding_concat = self.concat_embedding(embedding_concat)
        embedding_concat = embedding_concat.reshape(-1,128,16)
        
        embedding_concat = self.conv1(embedding_concat)
        embedding_concat = self.batch_norm_c1(embedding_concat)
        embedding_concat = self.relu1(embedding_concat)
        embedding_concat = self.dropout_c1(embedding_concat)
        embedding_concat = self.ave_po_c1(embedding_concat)
        
        embedding_concat = self.conv2(embedding_concat)
        embedding_concat = self.batch_norm_c2(embedding_concat)
        embedding_concat = self.relu2(embedding_concat)
        embedding_concat = self.dropout_c2(embedding_concat)
        embedding_concat = self.flt(embedding_concat)
        
        embedding_concat = self.linear3(embedding_concat)
        embedding_concat = self.batch_norm_c3(embedding_concat)
        embedding_concat = self.relu3(embedding_concat)
        embedding_concat = self.dropout_c3(embedding_concat)
        output = self.linear4(embedding_concat)
        
        return output
    
    
class lstm_Dataset(Dataset):
    def __init__(self, num_seq, class_seq, start_seq, mask,y = None, train=True):
        self.num_seq = num_seq
        self.class_seq = class_seq
        self.start_seq = start_seq
        self.mask = mask
        self.y = y
        self.train = train
    
    def __len__(self):
        return len(self.num_seq)

    def __getitem__(self, item):
        # Return the processed data where the lists are converted to `torch.tensor`s
        if self.train : 
            return {
              'input_data_num_seq': torch.tensor(self.num_seq[item], dtype=torch.float32),
              'input_data_class_seq': torch.tensor(self.class_seq[item], dtype=torch.long),
              'input_data_start_seq': torch.tensor(self.start_seq[item], dtype=torch.long),  
              'input_data_mask': torch.tensor(self.mask[item], dtype=torch.long), 
              "y":torch.tensor(self.y[item], dtype=torch.float32)
               }
        else:
            return {
              'input_data_num_seq': torch.tensor(self.num_seq[item], dtype=torch.float32),
              'input_data_class_seq': torch.tensor(self.class_seq[item], dtype=torch.long),
              'input_data_start_seq': torch.tensor(self.start_seq[item], dtype=torch.long),  
              'input_data_mask': torch.tensor(self.mask[item], dtype=torch.long), 
               }
        
        
class lstm_model(nn.Module):
    def __init__(
            self, dropout=0.2, class_unique = 7 + 1,start_unique = 2 + 1 ,
        class_emb = 15,start_emb = 5, d_model= 128,hidden_size1=128,output_dim=1):
        super(lstm_model, self).__init__()
        self.embedding_class = nn.Embedding(
            num_embeddings=class_unique, embedding_dim=class_emb)
        self.embedding_start = nn.Embedding(
            num_embeddings=start_unique, embedding_dim=start_emb)
        
        self.concat_emb = nn.Sequential(                
            nn.Linear(class_emb + start_emb + 41, d_model),
            nn.LayerNorm(d_model),
        )
        self.lstm = nn.LSTM(d_model, d_model,bidirectional = True, batch_first=True)
        # dense
        self.linear1 = nn.Linear(d_model*2 ,hidden_size1)
        self.layernorm = nn.LayerNorm(hidden_size1)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size1,output_dim)
        

    def forward(self, num_seq, class_seq, start_seq):
        embedding_class = self.embedding_class(class_seq)
        embedding_start = self.embedding_start(start_seq)
        embedding_concat = torch.cat(
            [embedding_class, embedding_start,num_seq], axis=-1)
        embedding_concat = self.concat_emb(embedding_concat)
        output,_ = self.lstm(embedding_concat)
        output = self.linear1(output)
        output = self.layernorm(output)
        output = self.relu(output)
        output = self.dropout(output)
        output = self.linear2(output)
        return output

In [None]:
def char2longformertoken(preds, longformer_tokenizer, tokenizer, pred_id, max_len=2048, token_len=512, dataset="test"):
    output_preds = np.zeros((len(preds), max_len, 15), dtype=np.float32)

    for id_num in tqdm(range(len(preds))):
        n = pred_id[id_num]
        name =  name = f'../input/feedback-prize-2021/test/{n}.txt'
        txt = open(name, 'r').read()
            
        char_pred = np.zeros([len(txt), 15], dtype=np.float32)
        longformer_tokens = longformer_tokenizer.encode_plus(
            txt, 
            max_length=max_len, 
            padding='max_length',
            truncation=True, 
            return_offsets_mapping=True
        )
        tokens = tokenizer.encode_plus(
            txt,
            max_length=token_len,
            padding='max_length',
            truncation=True,
            return_offsets_mapping=True
        )
        for m, o in enumerate(tokens['offset_mapping']):
            if (o[1] != 0) & (o[0] != o[1]):
                char_pred[o[0]:o[1]] = preds[id_num, m]

        for m,o in enumerate(longformer_tokens['offset_mapping']):
            if (o[1] != 0) & (o[0] != o[1]):
                output_preds[id_num, m, :] = np.mean(char_pred[o[0]:o[1]], axis=0)

    max_index = np.where(output_preds.sum(axis=-1).sum(axis=0) > 0)[0].max() + 1
    output_preds = output_preds[:, :max_index, :]
    print('shape:', output_preds.shape)
    return output_preds


def get_preds(CFG, custom_model):
    sub_preds0 = np.zeros((len(IDS), CFG.max_len, 15)).astype(np.float32)
    tokenizer = AutoTokenizer.from_pretrained(CFG.tokenizer_path)
    
    test_dataset = TestDataset(IDS, CFG.max_len, tokenizer)
    test_loader = DataLoader(test_dataset, 
                             batch_size=CFG.batch_size,
                             shuffle=False, 
                             pin_memory=True, drop_last=False)
    for fold in tqdm(range(5)):
        model = custom_model()
        model.load_state_dict(torch.load(CFG.model_path + f"/{CFG.model_prefix}{fold}.pth"))
        model.to(device)
        model.eval()

        test_preds_ = np.ndarray((0, CFG.max_len, 15), dtype=np.float32)
        if fold == 0:
            test_len = np.ndarray((0))
        with torch.no_grad():  
            for d in test_loader:
                d = collatte(d, train=False)
                ids = d['token'].to(device)
                mask = d['mask'].to(device)
                with autocast():
                    outputs = model(ids, mask)
                outputs = np.concatenate([
                    outputs.sigmoid().detach().cpu().numpy().astype(np.float32),
                    np.zeros([len(outputs), CFG.max_len - d["max_len"], 15], dtype=np.float32)],
                    axis=1)
                test_preds_ = np.concatenate([test_preds_, outputs], axis=0)
                if fold == 0:
                    test_len = np.concatenate([test_len, np.array([d["max_len"] for i in range(len(ids))])],axis=0)

        torch.cuda.empty_cache()
        sub_preds0 += test_preds_ / 5
        del model, test_preds_ ; gc.collect()
    print(sub_preds0.shape)
    del test_dataset,test_loader
    gc.collect()
    return sub_preds0, test_len

In [None]:
# ================================================
# Prepar Dynamic padding
# ================================================
files = os.listdir(DATA_PATH)
IDS = np.array([f.replace('.txt','') for f in files if 'txt' in f])
txt_len = []
for i in files:
    txt = open(f'{DATA_PATH}{i}', 'r').read()
    txt_len.append(len(txt))
IDS = IDS[np.argsort(txt_len)]

In [None]:
# ================================================
# exp019 longformer large + 1dcnn 
# ================================================
sub_preds0, test_len = get_preds(CFG=CFG_ex019, custom_model=custom_model_ex019)

# ================================================
# exp046 roberta-large ex048 bart large
# ================================================
sub_preds1, _ = get_preds(CFG=CFG_ex046, custom_model=custom_model_ex046)
sub_preds2, _ = get_preds(CFG=CFG_ex048, custom_model=custom_model_ex048)

# ================================================
# exp067 deberta-large
# ================================================
sub_preds3, _ = get_preds(CFG=CFG_ex067, custom_model=custom_model_ex067)

# ================================================
# exp051 funnel-large
# ================================================
sub_preds4, _ = get_preds(CFG=CFG_ex051, custom_model=custom_model_ex051)

# ================================================
# exp064 distilbart
# ================================================
sub_preds5, _ = get_preds(CFG=CFG_ex064, custom_model=custom_model_ex064)

In [None]:
# ================================================
# Token => Char => Token
# ================================================
longformer_tokenizer = AutoTokenizer.from_pretrained(CFG_ex019.tokenizer_path)
# deberta large
sub_preds3 = char2longformertoken(
    preds=sub_preds3,
    longformer_tokenizer=longformer_tokenizer,
    tokenizer=AutoTokenizer.from_pretrained(CFG_ex067.tokenizer_path), 
    pred_id=IDS,
    max_len=2048, 
    token_len=CFG_ex067.max_len,
    dataset="test")

# funnel large
sub_preds4 = char2longformertoken(
    preds=sub_preds4,
    longformer_tokenizer=longformer_tokenizer,
    tokenizer=AutoTokenizer.from_pretrained(CFG_ex051.tokenizer_path), 
    pred_id=IDS,
    max_len=2048, 
    token_len=CFG_ex051.max_len,
    dataset="test")

In [None]:
# ================================================
# Ensemble
# ================================================
model_names = ['longformer', 'roberta', 'bart', 'deberta', 'funnel', "distilbart"]
bio_names = []
for k in list(target_map.keys()):
    bio_names.append(k + '_B')
    bio_names.append(k + '_I')
bio_names.append('O')

variable_names = []
for model in model_names:
    for bio in bio_names:
        variable_names.append(model + '_' + bio)
        
longformer_len = sub_preds0.shape[1]
roberta_len = sub_preds1.shape[1]
bart_len = sub_preds2.shape[1]
deberta_len = sub_preds3.shape[1]
fuunel_len = sub_preds4.shape[1]
distilbart_len = sub_preds5.shape[1]

len_lst = [longformer_len, roberta_len, bart_len, deberta_len, fuunel_len, distilbart_len]
len_unq_lst = np.sort(np.unique(len_lst)).tolist() 
print(len_unq_lst)

def get_ensemble_preds(
    len_unq_lst, 
    w, 
    longformer, 
    roberta, 
    bart, 
    deberta, 
    funnel, 
    distilbart
    ):
    
    oof_pred = np.zeros_like(longformer, dtype=np.float32)
    # 0 ~ 512
    oof_pred[:, 0:len_unq_lst[0], :] += (
        (longformer[:, :len_unq_lst[0], :] * w[0]) +
        (deberta[:,0:len_unq_lst[0], :] * w[1]) + 
        (funnel[:, 0:len_unq_lst[0], :] * w[2]) + 
        (roberta[:, 0:len_unq_lst[0], :] * w[3]) + 
        (bart[:, 0:len_unq_lst[0], :] * w[4]) + 
        (distilbart[:, 0:len_unq_lst[0], :] * w[5]) 
        )
    
    # 0 ~ 724
    oof_pred[:, len_unq_lst[0]:len_unq_lst[1], :] += (
        (longformer[:, len_unq_lst[0]:len_unq_lst[1], :] * w[6]) +
        (deberta[:, len_unq_lst[0]:len_unq_lst[1], :] * w[7]) + 
        (funnel[:, len_unq_lst[0]:len_unq_lst[1], :] * w[8])
        )

    # 724 ~ 1023
    oof_pred[:, len_unq_lst[1]:len_unq_lst[2], :] += (
        (longformer[:, len_unq_lst[1]:len_unq_lst[2], :] * w[9]) +
        (deberta[:, len_unq_lst[1]:len_unq_lst[2], :] * w[10])
        )
    
    # 1023 ~ 
    oof_pred[:, len_unq_lst[2]:len_unq_lst[3], :] += longformer[:, len_unq_lst[2]:len_unq_lst[3], :] 
    return oof_pred

w = [0.24184607824700105,
    0.18400482284411232,
    0.24723517454937421,
    0.04129454852214315,
    0.17143565406629763,
    0.11418372177107165,
    0.2966133594221963,
    0.2911699110109591,
    0.4122167295668446,
    0.8888865848806372,
    0.11111341511936275]

sub_preds = get_ensemble_preds(
    len_unq_lst=len_unq_lst,
    w=w,
    longformer=sub_preds0, 
    roberta=sub_preds1, 
    bart=sub_preds2, 
    deberta=sub_preds3, 
    funnel=sub_preds4, 
    distilbart=sub_preds5,
   )

del w, sub_preds0, sub_preds1, sub_preds2, sub_preds3, sub_preds4, sub_preds5
gc.collect()

# normalization [0~1]
sum_sub_preds = np.repeat(np.expand_dims(sub_preds.sum(axis=-1), axis=2), 15, axis=-1) + 1e-15
sub_preds = (sub_preds / sum_sub_preds)
print(sub_preds)

In [None]:
# ================================================
# Weighting
# ================================================
def weight_labels_score(target_map, oof_pred, weights):
    if type(weights) == dict:

        for key, weight in weights.items():
            label = key.split('_')[0]
            if label != "O":
                bio = key.split('_')[1]
                l_idx = target_map[label] * 2
                t_idx = 0 if bio == "B" else 1
                idx = l_idx + t_idx
            else:
                idx = 14
            
            oof_pred[:, :, idx] *= weight
    
    else:
        for i in range(len(weights)):
            oof_pred[:, :, i] *= weights[i]
    
    return oof_pred


weights = {
    'Claim_B': 0.9149562809773297,
    'Claim_I': 0.8620142880910798,
    'Concluding Statement_B': 0.984674716755495,
    'Concluding Statement_I': 1.000519439831772,
    'Counterclaim_B': 0.9147544869816964,
    'Counterclaim_I': 1.1494319296540767,
    'Evidence_B': 1.0867709452911303,
    'Evidence_I': 1.0216596474248285,
    'Lead_B': 1.0434384111736767,
    'Lead_I': 1.0226444903418863,
    'O': 0.8189547714458012,
    'Position_B': 1.0658158817759569,
    'Position_I': 0.875215904989662,
    'Rebuttal_B': 1.0007876209721709,
    'Rebuttal_I': 1.1801748766472047}
sub_preds =  weight_labels_score(target_map, sub_preds, weights)  
sub_preds[:,1 :-1,:] = sub_preds[:,1:-1,:] * 0.85 + sub_preds[:,0:-2,:] * 0.075 + sub_preds[:,2:,:] * 0.075
print(sub_preds.shape)

del weights
gc.collect()

In [None]:
# ================================================
# Prepar for PP
# ================================================
sub_preds_max = np.argmax(sub_preds,axis=-1)
sub_preds_max_proba = np.max(sub_preds,axis=-1)
sub,sub_mean,sub_std,sub_max,sub_min = get_preds_collate_xgboost(
    IDS, 
    sub_preds_max,
    test_len,
    sub_preds_max_proba,
    sub_preds, 0, longformer_tokenizer)
del sub_preds_max_proba
gc.collect()

sub_mean_df = pd.DataFrame(sub_mean, columns = [f"pred_mean_{i}" for i in range(15)])
sub_std_df = pd.DataFrame(sub_std, columns = [f"pred_std_{i}" for i in range(15)])
sub_max_df = pd.DataFrame(sub_max, columns = [f"pred_max_{i}" for i in range(15)])
sub_min_df = pd.DataFrame(sub_min, columns = [f"pred_min_{i}" for i in range(15)])
del sub_mean,sub_std,sub_max,sub_min
gc.collect()

sub = pd.concat([sub,sub_mean_df,sub_std_df,sub_max_df,sub_min_df], axis=1)
sub["second"] = 0
del sub_mean_df,sub_std_df,sub_max_df,sub_min_df
gc.collect()

sub_preds_max_second = np.argsort(sub_preds,axis=-1)[:,:,-2]
sub_preds_max_proba_second = np.zeros([sub_preds_max_second.shape[0], sub_preds_max_second.shape[1]])
for i in tqdm(range(len(sub_preds_max_proba_second))):
    sub_preds_max_proba_second[i,:] = sub_preds[i, np.arange(sub_preds.shape[1]), sub_preds_max_second[i]]
    
sub_second,sub_mean_second,sub_std_second,sub_max_second,sub_min_second = get_preds_collate_xgboost(
    IDS, 
    sub_preds_max_second,
    test_len,
    sub_preds_max_proba_second,
    sub_preds,
    4,
    longformer_tokenizer)
del sub_preds_max, sub_preds_max_proba_second,sub_preds_max_second,sub_preds
gc.collect()

sub_mean_second_df = pd.DataFrame(sub_mean_second, columns = [f"pred_mean_{i}" for i in range(15)])
sub_std_second_df = pd.DataFrame(sub_std_second, columns = [f"pred_std_{i}" for i in range(15)])
sub_max_second_df = pd.DataFrame(sub_max_second, columns = [f"pred_max_{i}" for i in range(15)])
sub_min_second_df = pd.DataFrame(sub_min_second, columns = [f"pred_min_{i}" for i in range(15)])
del sub_mean_second,sub_std_second,sub_max_second,sub_min_second
gc.collect()

sub_second = pd.concat([
    sub_second,
    sub_mean_second_df,
    sub_std_second_df,
    sub_max_second_df,
    sub_min_second_df],axis=1)
sub_second["second"] = 1
del sub_mean_second_df,sub_std_second_df,sub_max_second_df,sub_min_second_df
gc.collect()

print("sub",len(sub))
print("sub_second",len(sub_second))

sub = pd.concat([sub, sub_second]).reset_index(drop=True)
del sub_second
gc.collect()

In [None]:
# ================================================
# FE
# ================================================
id_len_mean = sub[["id","pred_len"]].groupby(by="id")["pred_len"].mean().to_dict()
id_proba_mean = sub[["id","proba"]].groupby(by="id")["proba"].mean().to_dict()
sub["id_len_mean"] = sub["id"].map(id_len_mean)
sub["id_len_mean_diff"] = sub["pred_len"] - sub["id_len_mean"]
sub["id_proba_mean"] = sub["id"].map(id_proba_mean)
sub["id_proba_mean_diff"] = sub["proba"] - sub["id_proba_mean"]

sub["id_class"] = sub["id"].astype(str) + "-" + sub["class"].astype(str)
id_class_count_dict = sub["id_class"].value_counts().to_dict()
sub["id_class_count"] = sub["id_class"].map(id_class_count_dict)

id_class_len_mean = sub[["id_class","pred_len"]].groupby(by="id_class")["pred_len"].mean().to_dict()
id_class_proba_mean = sub[["id_class","proba"]].groupby(by="id_class")["proba"].mean().to_dict()

sub["id_class_len_mean"] = sub["id_class"].map(id_class_len_mean)
sub["id_class_len_mean_diff"] = sub["pred_len"] - sub["id_class_len_mean"]

sub["id_class_proba_mean"] = sub["id_class"].map(id_class_proba_mean)
sub["id_class_proba_mean_diff"] = sub["proba"] - sub["id_class_proba_mean"]

In [None]:
def sort_by_id_second_predstr(oof):
    oof_ = oof.copy()
    oof_["_1st_pred_str"] = oof_["predictionstring"].apply(lambda x:int(x.split()[0]))
    oof_ = oof_.sort_values(["id", "second", "_1st_pred_str"]).reset_index(drop=True)
    return oof_.drop("_1st_pred_str", axis=1)

sub = sort_by_id_second_predstr(sub)  # sort

In [None]:
# XGBoost 
sub["class"] = sub["class"].map(target_map)
xgb_features = features.copy()

for lag in [-1, 1]:
    tmp_df = sub.groupby(['id', 'second'])[
        [
            'class', 'pred_len', 'proba',  'start',
            'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4',
            'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9',
            'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14',
        ]
    ].shift(lag).add_prefix(f'shift{lag}_')
    sub = pd.concat([sub, tmp_df], axis=1)
    xgb_features += list(tmp_df.columns)

for lag in [-1, 1]:
    tmp_df = sub.groupby(['id', 'second'])[
        [
            'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4',
            'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9',
            'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14',
        ]
    ].shift(lag).add_prefix(f'diff{lag}_')
    tmp_df[tmp_df.columns] = tmp_df.values - sub[[
        'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4',
        'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9',
        'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14',
    ]].values
    sub = pd.concat([sub, tmp_df], axis=1)
    xgb_features += list(tmp_df.columns)

In [None]:
# ================================================
# XGB
# ================================================
test_preds_xgb = np.zeros(len(sub))
for i in range(5):
    clf = pickle.load(open(f"{XGB_MODEL}/xgb_fold{i}.pkl","rb"))
    clf.set_param({'predictor': 'gpu_predictor'})
    test_preds_xgb  += clf.predict(xgb.DMatrix(sub[xgb_features].values), ntree_limit=clf.best_ntree_limit) / 5

In [None]:
# ================================================
# MLP
# ================================================

# shift_classの欠損値埋め
sub[['shift1_class', 'shift-1_class']] = sub[['shift1_class', 'shift-1_class']].fillna(7).astype(np.uint8)

# # catとnumにそれぞれ追加
cat_cols += ['shift1_class', 'shift-1_class']
num_cols += sorted(list(set(xgb_features) - set(features) - set(cat_cols)))
print(num_cols)

num_cols = [
    'pred_len', 'proba', 
    'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4', 'pred_mean_5',
    'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9', 'pred_mean_10', 'pred_mean_11',
    'pred_mean_12', 'pred_mean_13', 'pred_mean_14', 
    'pred_std_0', 'pred_std_1', 'pred_std_2', 'pred_std_3', 'pred_std_4', 'pred_std_5',
    'pred_std_6', 'pred_std_7', 'pred_std_8', 'pred_std_9', 'pred_std_10', 'pred_std_11',
    'pred_std_12', 'pred_std_13', 'pred_std_14', 
    'id_len_mean', 'id_len_mean_diff', 'id_proba_mean', 'id_proba_mean_diff',
    'id_class_count', 'id_class_len_mean', 'id_class_len_mean_diff', 
    'id_class_proba_mean', 'id_class_proba_mean_diff', 
    'diff-1_pred_mean_0', 'diff-1_pred_mean_1', 'diff-1_pred_mean_10', 'diff-1_pred_mean_11',
    'diff-1_pred_mean_12', 'diff-1_pred_mean_13', 'diff-1_pred_mean_14', 'diff-1_pred_mean_2',
    'diff-1_pred_mean_3', 'diff-1_pred_mean_4', 'diff-1_pred_mean_5', 'diff-1_pred_mean_6', 
    'diff-1_pred_mean_7', 'diff-1_pred_mean_8', 'diff-1_pred_mean_9', 'diff1_pred_mean_0',
    'diff1_pred_mean_1', 'diff1_pred_mean_10', 'diff1_pred_mean_11', 'diff1_pred_mean_12', 
    'diff1_pred_mean_13', 'diff1_pred_mean_14', 'diff1_pred_mean_2', 'diff1_pred_mean_3', 
    'diff1_pred_mean_4', 'diff1_pred_mean_5', 'diff1_pred_mean_6', 'diff1_pred_mean_7', 
    'diff1_pred_mean_8', 'diff1_pred_mean_9', 'shift-1_pred_len', 'shift-1_pred_mean_0',
    'shift-1_pred_mean_1', 'shift-1_pred_mean_10', 'shift-1_pred_mean_11', 'shift-1_pred_mean_12',
    'shift-1_pred_mean_13', 'shift-1_pred_mean_14', 'shift-1_pred_mean_2', 'shift-1_pred_mean_3',
    'shift-1_pred_mean_4', 'shift-1_pred_mean_5', 'shift-1_pred_mean_6', 'shift-1_pred_mean_7',
    'shift-1_pred_mean_8', 'shift-1_pred_mean_9', 'shift-1_proba', 'shift-1_start', 'shift1_pred_len',
    'shift1_pred_mean_0', 'shift1_pred_mean_1', 'shift1_pred_mean_10', 'shift1_pred_mean_11', 
    'shift1_pred_mean_12', 'shift1_pred_mean_13', 'shift1_pred_mean_14', 'shift1_pred_mean_2',
    'shift1_pred_mean_3', 'shift1_pred_mean_4', 'shift1_pred_mean_5', 'shift1_pred_mean_6',
    'shift1_pred_mean_7', 'shift1_pred_mean_8', 'shift1_pred_mean_9', 'shift1_proba', 'shift1_start']


df_mean_std = pd.read_csv(f"{MLP_MODEL}/mean_std_df.csv")
for c in num_cols:
    mean_v = df_mean_std[df_mean_std.feature == c]["mean_val"].values[0]
    std_v = df_mean_std[df_mean_std.feature == c]["std_val"].values[0]
    sub[c] = (sub[c] - mean_v) / std_v
    sub[c] = sub[c].fillna(0)

test_preds_mlp = np.zeros(len(sub))
for fold in range(5):
    print(f"fold{fold}:start")
    test_ = mlp_Dataset(class_val = sub["class"].values,
                        start_val = sub["start"].values,
                        second_val = sub["second"].values,
                        num_features = sub[num_cols].values,
                        shift1class_val = sub["shift1_class"].values,
                        shiftm1class_val = sub["shift-1_class"].values,
                        train = False)


    # loader
    test_loader = DataLoader(dataset=test_, batch_size=64, shuffle = False , pin_memory=True)

    # model
    model = mlp_model()
    model.load_state_dict(torch.load(f"{MLP_MODEL}/seed_0_mlp_{fold}.pth"))
    model.to(device)
    model.eval()
    test_preds_ = np.ndarray((0,1))
    model.eval()  # switch model to the evaluation mode
    with torch.no_grad():  
        # Predicting on validation set

        for d in tqdm(test_loader,total=len(test_loader)):
            # =========================
            # data loader
            # =========================
            class_val = d['input_data_class'].to(device)
            start_val = d['input_data_start'].to(device)
            second_val = d['input_data_second'].to(device)
            num_val = d['input_data_num_features'].to(device)
            shift1class_val = d['input_data_shift1_class'].to(device)
            shiftm1class_val = d['input_data_shiftm1_class'].to(device)
            output = model(class_val, start_val, second_val, num_val, shift1class_val, shiftm1class_val)
            test_preds_ = np.concatenate([test_preds_, output.sigmoid().detach().cpu().numpy()], axis=0)
            
    test_preds_mlp += test_preds_.reshape(-1)/5

In [None]:
# ================================================
# LSTM
# ================================================
# lstm
max_seq_len = 70

# raw num_cols
num_cols = [
    'pred_len', 'proba',
    'pred_mean_0', 'pred_mean_1', 'pred_mean_2', 'pred_mean_3', 'pred_mean_4', 
    'pred_mean_5', 'pred_mean_6', 'pred_mean_7', 'pred_mean_8', 'pred_mean_9', 
    'pred_mean_10', 'pred_mean_11', 'pred_mean_12', 'pred_mean_13', 'pred_mean_14',
    
    'pred_std_0', 'pred_std_1', 'pred_std_2', 'pred_std_3', 'pred_std_4', 
    'pred_std_5', 'pred_std_6', 'pred_std_7', 'pred_std_8', 'pred_std_9',
    'pred_std_10', 'pred_std_11', 'pred_std_12', 'pred_std_13', 'pred_std_14',
    'id_len_mean', 'id_len_mean_diff', 'id_proba_mean', 'id_proba_mean_diff', 
    
    'id_class_count',
    'id_class_len_mean', 'id_class_len_mean_diff', 
    'id_class_proba_mean', 'id_class_proba_mean_diff']

# keyの作成
sub["id_second"] = sub["id"].astype(str) + "-" +  sub["second"].astype(str)
le = LabelEncoder()
sub["id_second"] = le.fit_transform(sub["id_second"])

# paddingを0にする
sub["class_"] = sub["class"] + 1
sub["start_"] = sub["start"] + 1

# seqの作成
num_seq = np.zeros([sub["id_second"].nunique(),max_seq_len,len(num_cols)])
class_seq = np.zeros([sub["id_second"].nunique(),max_seq_len])
start_seq = np.zeros([sub["id_second"].nunique(),max_seq_len])
mask = np.zeros([sub["id_second"].nunique(),max_seq_len])

num_values = sub[num_cols].values
class_ = sub["class_"].values
start_ = sub["start_"].values
id_second_unique = sub["id_second"].unique()
id_second = sub["id_second"].values

for n,i in tqdm(enumerate(id_second_unique),total=len(id_second_unique)):
    seq_len = np.sum(id_second == i)
    num_seq[n,:seq_len,:] = num_values[id_second == i,:]
    class_seq[n,:seq_len] = class_[id_second == i]
    start_seq[n,:seq_len] = start_[id_second == i]
    mask[n,:seq_len] = 1
    
test_preds_lstm = np.zeros(len(sub))
test_preds_lstm_seq = np.zeros_like(mask)

for fold in range(5):
    print(f"fold{fold}:start")
    test_ = lstm_Dataset( num_seq = num_seq,
                          class_seq = class_seq,
                          start_seq = start_seq,
                          mask = mask,
                          train=False)

    # loader
    test_loader = DataLoader(dataset=test_, batch_size=32, shuffle = False , pin_memory=True)

    # model
    model = lstm_model()
    model.load_state_dict(torch.load(f"{LSTM_MODEL}/seed_0_lstm_{fold}.pth"))
    model.to(device)
    model.eval()
    test_preds_ = np.ndarray((0,max_seq_len,1))
    model.eval()  # switch model to the evaluation mode
    with torch.no_grad():  
        # Predicting on validation set

        for d in tqdm(test_loader,total=len(test_loader)):
            # =========================
            # data loader
            # =========================
            input_num = d['input_data_num_seq'].to(device)
            input_class = d['input_data_class_seq'].to(device)
            input_start = d['input_data_start_seq'].to(device)
            output = model(input_num,input_class,input_start)
            test_preds_ = np.concatenate([test_preds_, output.sigmoid().detach().cpu().numpy()], axis=0)
    test_preds_lstm_seq += test_preds_.reshape([-1,max_seq_len])/5

for n,i in tqdm(enumerate(id_second_unique),total=len(id_second_unique)):
    seq_len = np.sum(id_second == i)
    test_preds_lstm[id_second == i] = test_preds_lstm_seq[n,:seq_len]

In [None]:
# ================================================
# XGB + MLP + LSTM
# ================================================
w = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]

sub["pred"] = (test_preds_xgb * w[0] + test_preds_mlp * w[1] + test_preds_lstm * w[2])
sub["class"] = sub["class"].map(target_map_rev)

sub["test_preds_xgb"] = test_preds_xgb
sub["test_preds_mlp"] = test_preds_mlp
sub["test_preds_lstm"] = test_preds_lstm
sub = sort_by_id_second_predstr(sub)
display(sub[["id", "predictionstring", "test_preds_xgb", "test_preds_mlp", "test_preds_lstm", "pred"]].head(60))

In [None]:
# ================================================
# Last PP -> Submission
# ================================================
sub_ensemble = pd.DataFrame()
pred_df_first = sub[sub.second == 0].reset_index(drop=True)
pred_df_second = sub[sub.second == 1].reset_index(drop=True)
for c in classes:
    
    pred_df_first_ = pred_df_first.loc[(pred_df_first['class']==c) & (pred_df_first['pred']>pred_dict_first[c])].reset_index(drop=True)
    pred_df_second_ = pred_df_second.loc[(pred_df_second['class']==c) & (pred_df_second['pred']>pred_dict_second[c])].reset_index(drop=True)

    pred_df = pd.concat([pred_df_first_,pred_df_second_]).reset_index(drop=True)
    pred_df = pred_df.sort_values(by=["id","pred"],ascending=False).reset_index(drop=True)
    if c in ["Lead","Position","Concluding Statement"]:
        pred_df = pred_df.drop_duplicates(subset = "id").reset_index(drop=True)
    sub_ensemble = pd.concat([sub_ensemble,pred_df[['id','class','predictionstring']]],axis=0).reset_index(drop=True)

In [None]:
classes = [
    "Lead",
    "Claim",
    "Position",
    "Evidence",
    "Counterclaim",
    "Concluding Statement",
    "Rebuttal"
]

weightclass_dict = {
    'weight_Lead': 0.14570664728598393,
    'weight_Claim': 0.27054193883215294,
    'weight_Position': 0.2160497395777658,
    'weight_Evidence': 0.22809965269846363,
    'weight_Counterclaim': 0.3465951774864235,
    'weight_Concluding Statement': 0.28841514563843695,
    'weight_Rebuttal': 0.28076095423970426,
    'threshold_Lead': 59,
    'threshold_Claim': 33,
    'threshold_Position': 31,
    'threshold_Evidence': 155,
    'threshold_Counterclaim': 34,
    'threshold_Concluding Statement': 174,
    'threshold_Rebuttal': 40,   
}


def add_pred(predictstring, weight, threshold):
    predictstring = predictstring.split()
    if len(predictstring) > threshold:
        predictstring = predictstring[:-1*int(len(predictstring)*weight)]
    return " ".join(predictstring)

for c in classes:
    func = lambda x: add_pred(x, weightclass_dict[f'weight_{c}'], weightclass_dict[f'threshold_{c}'])
    index = (sub_ensemble['class']==c)
    sub_ensemble.loc[index, 'predictionstring'] = sub_ensemble.loc[index, 'predictionstring'].apply(func)

In [None]:
sub_ensemble.to_csv("submission.csv",index=False)
display(sub_ensemble)