# Notes

This kernel if only for inference, the training one is on its road.  
The experimental model is **roBERTa**. But, as we're using the **huggingface**'s **AutoModel** interface, you can easily choose whatever you want.

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

import torch
from torch import nn
from  torch.utils.data import Dataset, DataLoader

import pickle


from tqdm.notebook import tqdm

from transformers import AutoTokenizer, AutoConfig, AutoModelForTokenClassification, AutoModelForSequenceClassification

In [None]:
MAX_LENGTH = 300
NUM_TARGETS = 1

SEED = 321

MODEL_NAME = "roberta-base"

In [None]:
TEST_BATCH_SIZE = 32
TEST_NUM_WORKERS = 2

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Device:", DEVICE)

In [None]:

df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")

print(df.shape)
df.head()

In [None]:
with open("../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base-tokenizer.pkl", "rb") as f:
    TOKENIZER = pickle.load(f)
    
with open("../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base-config.pkl", "rb") as f:
    CONFIG = pickle.load(f)

In [None]:
class CRPDataset(Dataset):
    def __init__(self, df, tokenizer=None):
        self.df = df
        
        self.tokenizer = TOKENIZER if tokenizer is None else tokenizer
        
        self.tokenizer_kwargs = dict(
            add_special_tokens=True,
            return_tensors="pt",
            max_length=MAX_LENGTH,
            padding="max_length",
            truncation=True,
        )
        
    def __len__(self):
        return len(self.df)
    
    def tokenize(self, txt):
        return self.tokenizer(txt, **self.tokenizer_kwargs)
    
    def __getitem__(self, idx):
        d = self.tokenize(df.excerpt.iloc[idx])
        input_ids, masks =  d["input_ids"].squeeze(0), d["attention_mask"].squeeze(0)
        return input_ids, masks

In [None]:
ds = CRPDataset(df)
len(ds)

In [None]:
x, x_mask = ds[0]
x.shape, x_mask.shape

# Inference

In [None]:
def get_model(model_name=None, task="token_classification", num_targets=NUM_TARGETS, config=None):
    task = task.lower()
        
    if "token" in task:
        model_instance = AutoModelForTokenClassification
    elif "sequence" in task:
        model_instance = AutoModelForSequenceClassification
        
    if config:
        model = model_instance.from_config(config)
        tokenizer = None
    else:
        model = model_instance.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        config = AutoConfig.from_pretrained(model_name)
    
    if hasattr(model, "classifier"):
        model.classifier = nn.Linear(model.classifier.in_features, NUM_TARGETS)
        
    return config,tokenizer, model

In [None]:
class AttentionBlock(nn.Module):
  def __init__(self, in_features, middle_features, out_features):
    super().__init__()
    self.in_features = in_features
    self.middle_features = middle_features
    self.out_features = out_features

    self.W = nn.Linear(in_features, middle_features)
    self.V = nn.Linear(middle_features, out_features)

  def forward(self, features):
    att = torch.tanh(self.W(features))

    score = self.V(att)

    attention_weights = torch.softmax(score, dim=1)

    context_vector = attention_weights * features
    context_vector = torch.sum(context_vector, dim=1)

    return context_vector

In [None]:
class CRPTokenModel(nn.Module):
    def __init__(self, config, num_targets=NUM_TARGETS):
        super().__init__()
        self.num_targets = num_targets
        
        config,tokenizer, model = get_model(config=config, task="token_classification", num_targets=1)
        
        self.in_features =  model.classifier.in_features
        model.classifier = nn.Identity()
        
        self.config = config
        self.tokenizer = tokenizer
        self.model = model
        
        self.att = AttentionBlock(self.in_features, self.in_features, 1)
        self.fc = nn.Linear(self.in_features, self.num_targets)
        
    def forward(self, *args, **kwargs):
        
        x = self.model(*args, **kwargs)["logits"]
        x = self.att(x)
        
        x = self.fc(x)
        return x

In [None]:
def load_net(checkpoint_path=None, num_targets=NUM_TARGETS, config=None):
    config = CONFIG if config is None else config

    net = CRPTokenModel(config)
    net = net.to(DEVICE)
    if checkpoint_path is not None:
        net.load_state_dict(torch.load(checkpoint_path, map_location=DEVICE))
    net = net.eval()
    return net

In [None]:
@torch.no_grad()
def _predict(nets, xb):
    pred = 0
    for net in nets:
        pred += net(input_ids=xb[0], attention_mask=xb[1])

    pred /= len(nets)

    return pred

@torch.no_grad()
def predict(nets, test_data):
    preds = []
    for xb in  test_data:
        xb = (xb[0].to(DEVICE), xb[1].to(DEVICE))
        
        preds.append(_predict(nets, xb).cpu().numpy())

    preds = np.concatenate(preds)
    return preds

In [None]:
test_data = CRPDataset(df)
test_loader = DataLoader(test_data, batch_size=TEST_BATCH_SIZE, num_workers=TEST_NUM_WORKERS, shuffle=False)
len(test_data), len(test_loader)

In [None]:
# checkpoint_paths = list(Path("../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666").glob("*.pth"))


checkpoint_paths = [
    '../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666/crp_roberta-base_fold0_epoch_00_rmse_val_-0.5064_20210504223412.pth',
    '../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666/crp_roberta-base_fold1_epoch_03_rmse_val_-0.5388_20210504225149.pth',
    '../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666/crp_roberta-base_fold2_epoch_05_rmse_val_-0.5622_20210504230806.pth',
    '../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666/crp_roberta-base_fold3_epoch_00_rmse_val_-0.5852_20210504231457.pth',
    '../input/crp-roberta-model-kkiller-public-ds/crp-roberta-model-kkiller-private/roberta-base_maxlen300_seed666/crp_roberta-base_fold4_epoch_02_rmse_val_-0.5352_20210504233113.pth',
]

nets = [
    load_net(str(ckpt)) for ckpt in checkpoint_paths
]

print(len(nets))

In [None]:
preds = predict(nets, test_loader)
print(preds.shape)
preds[:10]

In [None]:
sub = df[["id"]].copy()
sub["target"] = preds

print(sub.shape)
sub.head()

In [None]:
sub.to_csv("submission.csv", index=False)