In [None]:
import sys
sys.path.append("../input/tez-lib/")
import torch
from sklearn import metrics
import transformers
import torch.nn as nn
import numpy as np
import tez
import pandas as pd
from tqdm import tqdm

In [None]:
class AttentionHead(nn.Module):
    def __init__(self, in_size: int = 768, hidden_size: int = 512) -> None:
        super().__init__()
        self.W = nn.Linear(in_size, hidden_size)
        self.V = nn.Linear(hidden_size, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, features):
        att = torch.tanh(self.W(features))
        score = self.V(att)
        attention_weights = torch.softmax(score, dim=1)
        context_vector = attention_weights * features
        context_vector = torch.sum(context_vector, dim=1)
        output = self.dropout(context_vector)
        return output


class CommonlitModel(tez.Model):
    def __init__(self, model_name, num_train_steps, steps_per_epoch, learning_rate):
        super().__init__()
        self.learning_rate = learning_rate
        self.steps_per_epoch = steps_per_epoch
        self.model_name = model_name
        self.num_train_steps = num_train_steps
        self.step_scheduler_after = "batch"
        hidden_dropout_prob: float = 0.0
        layer_norm_eps: float = 1e-7
        config = transformers.AutoConfig.from_pretrained(model_name)
        config.update(
            {
                "output_hidden_states": True,
                "hidden_dropout_prob": hidden_dropout_prob,
                "layer_norm_eps": layer_norm_eps,
            }
        )
        self.transformer = transformers.AutoModel.from_pretrained(model_name, config=config)
        self.attention = AttentionHead(in_size=config.hidden_size, hidden_size=config.hidden_size)
        self.regressor = nn.Linear(config.hidden_size * 2, 2)

    def forward(self, ids1, mask1, ids2, mask2, targets=None):
        output1 = self.transformer(ids1, mask1)
        output2 = self.transformer(ids2, mask2)
        output1 = self.attention(output1.last_hidden_state)
        output2 = self.attention(output2.last_hidden_state)
        output = torch.cat((output1, output2), dim=1)
        output = self.regressor(output)
        # output = self.regressor(output.pooler_output)
        return output, 0, {}


class CommonlitDataset:
    def __init__(self, excerpts, target_dict, tokenizer, max_len, num_samples=None):
        self.excerpts = excerpts
        self.target_dict = target_dict
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.num_samples = num_samples

    def __len__(self):
        return len(self.excerpts)

    def __getitem__(self, item):
        text1 = str(self.excerpts[item][1])
        text2 = str(self.excerpts[item][0])
        target = [self.target_dict[text2], self.target_dict[text1]]
        inputs1 = self.tokenizer(text1, max_length=self.max_len, padding="max_length", truncation=True)
        inputs2 = self.tokenizer(text2, max_length=self.max_len, padding="max_length", truncation=True)
        ids1 = inputs1["input_ids"]
        mask1 = inputs1["attention_mask"]
        ids2 = inputs2["input_ids"]
        mask2 = inputs2["attention_mask"]
        return {
            "ids1": torch.tensor(ids1, dtype=torch.long),
            "mask1": torch.tensor(mask1, dtype=torch.long),
            "ids2": torch.tensor(ids2, dtype=torch.long),
            "mask2": torch.tensor(mask2, dtype=torch.long),
            "targets": torch.tensor(target, dtype=torch.float),
        }

In [None]:
max_len = 256
model_name = "microsoft/deberta-large"

df = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
base_string = df.loc[df.target == 0, "excerpt"].values[0]

df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
df["target"] = 0
test_pairs = [(base_string, k) for k in df.excerpt.values.tolist()]
target_dict = dict(zip(df.excerpt.values.tolist(), df.target.values.tolist()))
target_dict[base_string] = 0

mpath = "../input/huggingface-deberta-variants/deberta-large/deberta-large"
tokenizer = transformers.AutoTokenizer.from_pretrained(mpath)

final_preds = []
for _fold in range(5):
    test_dataset = CommonlitDataset(
        excerpts=test_pairs,
        target_dict=target_dict,
        tokenizer=tokenizer,
        max_len=max_len,
    )

    model = CommonlitModel(
        model_name=mpath,
        num_train_steps=1,
        learning_rate=1,
        steps_per_epoch=1,
    )

    model.load(
        f"../input/deberta-pairwise-466/{model_name.replace('/','')}__fold_{_fold}.bin",
        weights_only=True,
    )

    temp_final_preds = []
    for p in tqdm(model.predict(test_dataset, batch_size=64, n_jobs=-1)):
        temp_preds = p[:, 1].tolist()
        temp_final_preds.extend(temp_preds)

    final_preds.append(temp_final_preds)

In [None]:
_preds = np.column_stack(final_preds)
_preds = np.mean(_preds, axis=1)
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
submission.target = _preds
submission.to_csv("submission.csv", index=False)

In [None]:
submission.head()