<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
Inf
</b></h1> 



<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
Logs
</b></h2> 

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
Libs
</b></h2> 

In [None]:
import os
import gc
import random
import glob
import numpy as np
import pandas as pd

import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

from dataclasses import dataclass

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, AutoModel

import warnings 
warnings.filterwarnings('ignore')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True    
    torch.backends.cudnn.benchmark = False

    
def inference_fn(test_loader, model, device, is_sigmoid=True):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
            
        with torch.no_grad():
            output = model(inputs)
        
        if is_sigmoid == True:
            preds.append(output.sigmoid().to('cpu').numpy())
        else:
            preds.append(output.to('cpu').numpy())

    return np.concatenate(preds)    
    

def upd_outputs(data, is_trim=False, is_minmax=False, is_reshape=False):
    min_max_scaler = MinMaxScaler()
    
    if is_trim == True:
        data = np.where(data <=0, 0, data)
        data = np.where(data >=1, 1, data)

    if is_minmax ==True:
        data = min_max_scaler.fit_transform(data)
    
    if is_reshape == True:
        data = data.reshape(-1)
        
    return data


In [None]:
pd.set_option('display.precision', 4)
cm = sns.light_palette('green', as_cmap=True)
props_param = "color:white; font-weight:bold; background-color:green;"

CUSTOM_SEED = 42
CUSTOM_BATCH = 24
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
competition_dir = "../input/us-patent-phrase-to-phrase-matching/"

submission = pd.read_csv(competition_dir+'sample_submission.csv')
test_origin = pd.read_csv(competition_dir+'test.csv')
test_origin.head()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
Inference TorchModels
</b></h1> 

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
D-104MixFold-TRAIN-deberta-v3-large-ModelExtendv1-SplitScore-All
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-d-104mixfold-20220613115117/"
    config_path=path+'config.pth'
    model="microsoft/deberta-v3-large"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
D_104Mix = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    D_104Mix.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
D_104Mix = [upd_outputs(x, is_reshape=True) for x in D_104Mix]
D_104Mix = pd.DataFrame(D_104Mix).T

D_104Mix.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
Changed
    D-20031MixFold-TRAIN-electra-large-discriminator-ModelExtendv1-SplitScore-s5-e10-f1-Copy14 
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/electrav1/"
    config_path=path+'config.pth'
    model="google/electra-large-discriminator"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3]
    
CFG.tokenizer = AutoTokenizer.from_pretrained("../input/pppm-dc-d-20031mixfold-20220615092815/tokenizer/")

context_mapping = torch.load("../input/pppm-dc-d-20031mixfold-20220615092815/cpc_texts.pth")



In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
D_20031Mix = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f'../input/electrav1/google-electra-large-discriminator_fold{fold}_best/google-electra-large-discriminator_fold{fold}_best.pth'
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    D_20031Mix.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
D_20031Mix = [upd_outputs(x, is_reshape=True) for x in D_20031Mix]
D_20031Mix = pd.DataFrame(D_20031Mix).T

D_20031Mix.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
D-1123MixFold-TRAIN-deberta-v3-base-ModelExtendv1-SplitScore-s5-e10 
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-d-1123mixfold-20220614054320/"
    config_path=path+'config.pth'
    model="microsoft/deberta-v3-base"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
D_1123Mix = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    D_1123Mix.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
D_1123Mix = [upd_outputs(x, is_reshape=True) for x in D_1123Mix]
D_1123Mix = pd.DataFrame(D_1123Mix).T

D_1123Mix.head(10).style.background_gradient(cmap=cm, axis=1)

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
D-1112MixFold-TRAIN-deberta-v3-large-ModelExtendv1-SplitAnchor-s5-e10
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-d-1112mixfold-20220614045637/"
    config_path=path+'config.pth'
    model="microsoft/deberta-v3-large"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
D_1112Mix = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    D_1112Mix.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
D_1112Mix = [upd_outputs(x, is_reshape=True) for x in D_1112Mix]
D_1112Mix = pd.DataFrame(D_1112Mix).T

D_1112Mix.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
G-100MixFold---LB:0.8243
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-g-100mixfold-20220617152024/"
    config_path=path+'config.pth'
    model="microsoft/deberta-large"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
G_100MixFold = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    G_100MixFold.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
G_100MixFold = [upd_outputs(x, is_reshape=True) for x in G_100MixFold]
G_100MixFold = pd.DataFrame(G_100MixFold).T

G_100MixFold.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
F-10xMixFold---0.84976
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-f-10xmixfold-20220618121926/"
    config_path=path+'config.pth'
    model="microsoft/deberta-v2-xlarge"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
F_10xMixFold = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    F_10xMixFold.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
F_10xMixFold = [upd_outputs(x, is_reshape=True) for x in F_10xMixFold]
F_10xMixFold = pd.DataFrame(F_10xMixFold).T

F_10xMixFold.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
I-100-TRAIN-albert-xxlarge-v2-ModelExtendAttention-SplitScore-s5-e10-f012345 
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-i-100mixfold-20220618221837/"
    config_path=path+'config.pth'
    model="albert-xxlarge-v2"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
            
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
I_100MixFold = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    I_100MixFold.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
I_100MixFold = [upd_outputs(x, is_reshape=True) for x in I_100MixFold]
I_100MixFold = pd.DataFrame(I_100MixFold).T

I_100MixFold.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
L-101-TRAIN-bert-for-patents-ModelExtendv1-SplitScore-s5 
</b></h2> 

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-l-101mixfold-20220620013202/"
    config_path=path+'config.pth'
    model="anferico/bert-for-patents"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.attention)
        self.linear = nn.Linear(self.config.hidden_size, 1)
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs[0]
        input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        out = sum_embeddings / sum_mask
        
        out = self.layer_norm1(out)
        output = self.fc(out)
        
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
L_101MixFold = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    L_101MixFold.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
L_101MixFold = [upd_outputs(x, is_reshape=True) for x in L_101MixFold]
L_101MixFold = pd.DataFrame(L_101MixFold).T

L_101MixFold.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
N-200MixFold-TRAIN-albert-xxlarge-v1-ModelExtendAttention-SplitScore-s5-e10
</b></h2>

In [None]:
class CFG:
    num_workers=2
    path="../input/pppm-dc-n-200mixfold-20220620180707/"
    config_path=path+'config.pth'
    model="albert-xxlarge-v1"
    batch_size=CUSTOM_BATCH
    fc_dropout=0.2
    target_size=1
    max_len=133
    trn_fold=[0,1,2,3,4]
    
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.path+'tokenizer/')

context_mapping = torch.load(CFG.path+"cpc_texts.pth")

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

    
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
            
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        
        return output

seed_everything(CUSTOM_SEED)

In [None]:
test = test_origin.copy()

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']

display(test.head())

In [None]:
N_200MixFold = []

test_dataset = TestDataset(CFG, test)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         num_workers=CFG.num_workers,
                         pin_memory=True, drop_last=False)

folds_path = CFG.path + f"{CFG.model.replace('/', '-')}"

for fold in CFG.trn_fold:
    fold_path = f"{folds_path}_fold{fold}_best.pth"
    model = CustomModel(CFG, config_path=CFG.config_path, pretrained=False)
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_loader, model, DEVICE)
    N_200MixFold.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()
    
N_200MixFold = [upd_outputs(x, is_reshape=True) for x in N_200MixFold]
N_200MixFold = pd.DataFrame(N_200MixFold).T

N_200MixFold.head(10).style.background_gradient(cmap=cm, axis=1)



In [None]:
del test, test_dataset
gc.collect()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
Inference TransformersModels
</b></h1> 

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
PubKernel
</b></h2>

https://www.kaggle.com/code/surilee/inference-bert-for-uspatents-deepshare/notebook
LB:0.8392

https://www.kaggle.com/code/renokan/2-deberta-1-roberta-analysis-and-using/notebook

In [None]:
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           max_length=cfg.max_len,
                           padding="max_length",
                           truncation=True)
    
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs

class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg        
        self.text = df['text'].values
        
    def __len__(self):
        return len(self.text)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.text[item])
        
        return inputs
   
    
class CustomModel(nn.Module):
    def __init__(self, model_path):
        super().__init__()
        
        config = AutoConfig.from_pretrained(model_path)
        config.num_labels = 1
        self.base = AutoModelForSequenceClassification.from_config(config=config)
        dim = config.hidden_size
        self.dropout = nn.Dropout(p=0)
        self.cls = nn.Linear(dim,1)
        
    def forward(self, inputs):
        output = self.base(**inputs)

        return output[0]

In [None]:
seed_everything(CUSTOM_SEED)

In [None]:
class CFG:
    model_path='../input/deberta-v3-large/deberta-v3-large'
    batch_size=CUSTOM_BATCH
    num_workers=2
    max_len=130
    trn_fold=[0, 1, 2, 3]

CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.model_path)

context_mapping = torch.load("../input/folds-dump-the-two-paths-fix/cpc_texts.pth")

In [None]:
test = test_origin.copy()
titles = pd.read_csv('../input/cpc-codes/titles.csv')

test.reset_index(inplace=True)
test = test.merge(titles, left_on='context', right_on='code')
test.sort_values(by='index', inplace=True)
test.drop(columns='index', inplace=True)

test['context_text'] = test['context'].map(context_mapping)
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']
test['text'] = test['text'].apply(str.lower)

test.head()

In [None]:
pub_deberta_predicts_1 = []

test_dataset = TestDataset(CFG, test)
test_dataloader = DataLoader(test_dataset,
                             batch_size=CFG.batch_size, shuffle=False,
                             num_workers=CFG.num_workers,
                             pin_memory=True, drop_last=False)

deberta_simple_path = "../input/us-patent-deberta-simple/microsoft_deberta-v3-large"

for fold in CFG.trn_fold:
    fold_path = f"{deberta_simple_path}_best{fold}.pth"
    
    model = CustomModel(CFG.model_path)    
    state = torch.load(fold_path, map_location=torch.device('cpu'))  # DEVICE
    model.load_state_dict(state['model'])
    
    prediction = inference_fn(test_dataloader, model, DEVICE, is_sigmoid=False)
    
    pub_deberta_predicts_1.append(prediction)
    
    del model, state, prediction
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
# -------------- inference_fn([...], is_sigmoid=False)
pub_deberta_predicts_1 = [upd_outputs(x, is_minmax=True, is_reshape=True) for x in pub_deberta_predicts_1]
pub_deberta_predicts_1 = pd.DataFrame(pub_deberta_predicts_1).T

pub_deberta_predicts_1.head(10).style.background_gradient(cmap=cm, axis=1)

In [None]:
del test, test_dataset
gc.collect()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
Ensemble
</b></h1> 

In [None]:
IF_ENSEMBLE=True

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
No Ensemble
</b></h2> 

In [None]:
# if not IF_ENSEMBLE:
#     print("# ---------------------------------------------------- #")
#     print("# Prediction Single Model")
#     print("# ---------------------------------------------------- #")
    
#     PREDICTION_1 = D_104Mix
#     PREDICTION_KEY = 'D_104Mix'
    
#     # --------------------------------------------------------------- #
#     all_predictions = pd.concat(
#         [PREDICTION_1],
#         keys=[PREDICTION_KEY],
#         axis=1
#     )

#     all_predictions.head(10) \
#         .assign(mean=lambda x: x.mean(axis=1)) \
#             .style.background_gradient(cmap=cm, axis=1)
    
#     # --------------------------------------------------------------- #
#     all_mean = pd.DataFrame({
#         'PREDICTION_KEY': PREDICTION_1.mean(axis=1)
#     })

#     all_mean.head(10) \
#         .assign(mean=lambda x: x.mean(axis=1)) \
#             .style.highlight_max(axis=1, props=props_param)
#     # --------------------------------------------------------------- #
#     # === N1 ===
#     weights_ = [1.00]
#     final_predictions = all_mean.mul(weights_).sum(axis=1)

#     # === N2 ===
#     # final_predictions = all_mean.median(axis=1)
#     # final_predictions = all_mean.mean(axis=1)

#     # === N3 ===
#     # final_predictions = all_predictions.mean(axis=1)

#     # === N4 ===
#     # combs = pd.DataFrame({
#     #     'deberta_1': deberta_predicts_1.mean(axis=1),
#     #     'deb_2+rob': (deberta_predicts_2.mean(axis=1) * 0.666) \
#     #                     + (roberta_predicts.mean(axis=1) * 0.333)
#     # })
#     # display(combs.head())
#     # final_predictions = combs.median(axis=1)
#     # final_predictions = combs.mean(axis=1)

#     final_predictions.head()

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>
Use Ensemble
</b></h2> 

In [None]:
if IF_ENSEMBLE:
    all_predictions = pd.concat(
        [D_104Mix,
         D_20031Mix,
         D_1123Mix,
         D_1112Mix,
         pub_deberta_predicts_1,
         G_100MixFold,
         F_10xMixFold,
         I_100MixFold,
         L_101MixFold,
         N_200MixFold
        ],
        keys=['D_104Mix',
              'D_20031Mix',
              'D_1123Mix',
              'D_1112Mix',
              'pub_deberta_predicts_1',
              'G_100MixFold',
              'F_10xMixFold',
              'I_100MixFold',
              'L_101MixFold',
              'N_200MixFold'
             ],
        axis=1
    )

    all_predictions.head(10) \
        .assign(mean=lambda x: x.mean(axis=1)) \
            .style.background_gradient(cmap=cm, axis=1)

In [None]:
if IF_ENSEMBLE:
    all_mean = pd.DataFrame({
        'D_104Mix': D_104Mix.mean(axis=1),
        'D_20031Mix': D_20031Mix.mean(axis=1),
        'D_1123Mix': D_1123Mix.mean(axis=1),
        'D_1112Mix': D_1112Mix.mean(axis=1),
        'pub_deberta_predicts_1': pub_deberta_predicts_1.mean(axis=1),
        'G_100MixFold': G_100MixFold.mean(axis=1),
        'F_10xMixFold': F_10xMixFold.mean(axis=1),
        'I_100MixFold': I_100MixFold.mean(axis=1),
        'L_101MixFold': L_101MixFold.mean(axis=1),
        'N_200MixFold': N_200MixFold.mean(axis=1)
    })

    all_mean.head(10) \
        .assign(mean=lambda x: x.mean(axis=1)) \
            .style.highlight_max(axis=1, props=props_param)

In [None]:
if IF_ENSEMBLE:
    # === N1 ===
    weights_ = [0.30, 0.15, 0.05, 0.06, 0.15, 0.06, 0.06, 0.06, 0.06, 0.05]
    final_predictions = all_mean.mul(weights_).sum(axis=1)

    # === N2 ===
    # final_predictions = all_mean.median(axis=1)
    # final_predictions = all_mean.mean(axis=1)

    # === N3 ===
    # final_predictions = all_predictions.mean(axis=1)

    # === N4 ===
    # combs = pd.DataFrame({
    #     'deberta_1': deberta_predicts_1.mean(axis=1),
    #     'deb_2+rob': (deberta_predicts_2.mean(axis=1) * 0.666) \
    #                     + (roberta_predicts.mean(axis=1) * 0.333)
    # })
    # display(combs.head())
    # final_predictions = combs.median(axis=1)
    # final_predictions = combs.mean(axis=1)

    final_predictions.head()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
Submission
</b></h1> 

In [None]:
submission = pd.DataFrame({
    'id': test_origin['id'],
    'score': final_predictions,
})

submission.head(14)

In [None]:
submission.to_csv('submission.csv', index=False)

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>
EOF
</b></h1> 