In [None]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import (
    get_cosine_schedule_with_warmup, 
    get_cosine_with_hard_restarts_schedule_with_warmup,
    get_linear_schedule_with_warmup,
    get_constant_schedule_with_warmup
)
from transformers import AdamW
from transformers import AutoTokenizer
from transformers import AutoConfig
from transformers import AutoModel
from transformers import PreTrainedModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

In [None]:
#model
conf_dict = {
    "split_num" : 4,
    "seed" : [100, 99,98],
#     "seed" : [0],
    "batch_size" : 4,
#     "learning_rate" : 2e-5,
    "learning_rate" : 1e-4,
    "regressor_lr" : 1.0,
    "weight_decay" : 0.01,
    "max_length" : 256,
    "epochs" : 6,
#     "epochs" : 10,
    "hidden_dropout" : 0,
    "attention_dropout" : 0.1,
    "linear_dropout1" : 0,
    "linear_dropout2" : 0.3,
    "warmup_ratio" : 0.06,
    "use_llrd" : True,
#     "llrd_rate" : 0.95,
    "llrd_rate" : 0.8,
    "freeze_embed" : True,
    "use_mixout" : False,
    "mixout_prob" : 0.3,
    "use_prior_wd" : False,
    "use_commonlit_pretrained" : False,
    "use_relu" : False,
    "val_interval" : 20,
    "no_interval_epoch" : 0,
#     "reinit_layers" : 0,
    "reinit_layers" : 4,
    "split_type" : 1,
    
#     "model_name" : "roberta-base",
    "model_name" : 'roberta-large',
#     "model_name" : 'microsoft/deberta-base',
#     "model_name" : 'microsoft/deberta-large',
#     "model_name" : "google/electra-large-generator",
#     "model_name" : "google/electra-large-discriminator",
#     "input_linear_num1" : 768,
#     "input_linear_num2" : 384,
    "input_linear_num1" : 1024,
    "input_linear_num2" : 512,
    "save_model_name" : 'mlm_false'
}

class ReadabilityModel(PreTrainedModel): 
    def __init__(self, conf):
        super(ReadabilityModel, self).__init__(conf) 


        self.bert = AutoModel.from_pretrained('../input/robertalarge', config=conf)
        
        self.drop_out1 = nn.Dropout(conf_dict["linear_dropout1"])
        self.drop_out2 = nn.Dropout(conf_dict["linear_dropout2"])

        self.layer_norm1 = nn.LayerNorm(conf_dict["input_linear_num1"])
        self.l1 = nn.Linear(conf_dict["input_linear_num1"], conf_dict["input_linear_num2"])
        self.l2 = nn.Linear(conf_dict["input_linear_num2"], 1)

        self._init_weights(self.layer_norm1)
        self._init_weights(self.l1)
        self._init_weights(self.l2)
 
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
    
    def forward(self, ids, mask):
        out = self.bert(
            input_ids=ids,
            attention_mask=mask
        )

        # out = out['pooler_output']        
        out = torch.mean(out['last_hidden_state'], 1)
        out = self.layer_norm1(out)
        out = self.drop_out1(out)
        out = self.l1(out)
        if conf_dict["use_relu"]:
            out = F.relu(out)
        out = self.drop_out2(out)
        out = self.l2(out)
#         print("out:",out.shape)
        
        preds = out.squeeze(-1)
#         raise

        return preds

In [None]:
class Test_Data(Dataset):
    def __init__(self, data):
        super(Test_Data, self).__init__()
        self.data = data

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):       
        excerpt = self.data.excerpt[idx]
        return excerpt

  

In [None]:
def generate_predictions(model_path, max_len):
    model_config = AutoConfig.from_pretrained('../input/robertalarge')
    model_config.output_hidden_states = True
    model = ReadabilityModel(model_config)
    #model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained('../input/robertalarge')
    model.load_state_dict(torch.load(model_path)) 

    model.to("cuda")
    model.eval()
    
    df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
    test_data = Test_Data(data = df)
    test_loader = DataLoader(dataset = test_data, shuffle=False, batch_size = 64) 
    
    #dataset = Dataset(excerpt=df.excerpt.values, tokenizer=tokenizer, max_len=max_len)
    #data_loader = torch.utils.data.DataLoader(
    #    dataset, batch_size=64, num_workers=4, pin_memory=True, shuffle=False
    #)

    final_output = []
    preds_val = []
    out_list = []
    device=torch.device("cuda")
    #result = np.zeros(len(data_loader.dataset)) 
    index = 0
    with torch.no_grad():
        for i, excerpts in enumerate(test_loader):
            batch = tokenizer(list(excerpts), truncation=True, padding="max_length", return_tensors='pt', max_length=conf_dict["max_length"])
            input_ids = batch['input_ids']
            input_ids = input_ids.to(device, dtype=torch.long)
            attention_mask = batch['attention_mask']
            attention_mask = attention_mask.to(device, dtype=torch.long)
            #import pdb
            #pdb.set_trace()

            pred = model(input_ids, attention_mask)
            #out_list.append(pred)
            #print(pred)
            #output = output.detach().cpu().numpy().ravel().tolist()
            #result[index : index + pred.shape[0]] = pred.flatten().to("cpu")
            #index += pred.shape[0]
            
            preds = pred.cpu().detach().numpy().tolist()
            preds_val += preds
            #final_output.extend(output)
            #print(output)

#     for b_idx, data in enumerate(data_loader):
#         with torch.no_grad():
#             for key, value in data.items():
#                 data[key] = value.to("cuda")
#             output = model(**data)
#             output = output.logits.detach().cpu().numpy().ravel().tolist()
#             final_output.extend(output)
    
    torch.cuda.empty_cache()
    print(out_list)
    #return np.array(final_output)
    return preds_val

In [None]:
#preds1 = generate_predictions("../input/common/roberta-large_mlm_false_fold0_seed98.bin", max_len=256)

In [None]:
#preds=[preds1,preds1]

In [None]:
#preds

In [None]:
preds1 = generate_predictions("../input/common/roberta-large_mlm_false_fold0_seed98.bin", max_len=256)
preds2 = generate_predictions("../input/common/roberta-large_mlm_false_fold1_seed98.bin", max_len=256)
preds3 = generate_predictions("../input/common/roberta-large_mlm_false_fold2_seed98.bin", max_len=256)
preds4 = generate_predictions("../input/common/roberta-large_mlm_false_fold3_seed98.bin", max_len=256)

fold_preds_list = [preds1, preds2, preds3, preds4]/2
#preds = (preds1 + preds2 + preds3 + preds4) / 4
preds = np.mean(fold_preds_list,axis=0)

In [None]:
#preds1

In [None]:
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
submission.target = preds
submission.to_csv("submission.csv", index=False)

In [None]:
submission