In [1]:
from transformers import * 
import numpy as np 
import pandas as pd
import torch 
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler 
import time 
import datetime 
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import re
import os

  '"sox" backend is being deprecated. '


In [4]:
roberta_checkpoint = torch.load("../input/roberta-large-epoch-9/RoBERTa_large_9")
electra_checkpoint = torch.load("../input/electra-largeepoch6/ELECTRA_large_6") 


In [6]:
roberta_test = RobertaForSequenceClassification.from_pretrained("../input/huggingface-roberta/roberta-large", num_labels=1) 
roberta_test.load_state_dict(roberta_checkpoint)
roberta_test.cuda()
print()

Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ../input/huggingface-roberta/roberta-large and a




In [7]:
electra_test = ElectraForSequenceClassification.from_pretrained("../input/electra/large-discriminator", num_labels=1) 
electra_test.load_state_dict(electra_checkpoint) 
electra_test.cuda() 
print() 

Some weights of the model checkpoint at ../input/electra/large-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at ../input/electra/large-discriminator and are newly initialized: ['classi




In [12]:
roberta_tokenizer = RobertaTokenizerFast.from_pretrained("../input/huggingface-roberta/roberta-large") 

def roberta_tokenizer_process(sent, MAX_LEN):  
    encoded_dict = roberta_tokenizer.encode_plus(
        text = sent, 
        add_special_tokens = True, 
        pad_to_max_length = False, 
        return_attention_mask = True 
    )
    input_id = encoded_dict['input_ids'] 
    attention_mask = encoded_dict['attention_mask'] 
    if len(input_id) > 512: 
        input_id = input_id[:129] + input_id[-383:] 
        attention_mask = attention_maks[:129] + attention_mask[-383:] 
        print("Long Text!! Using Head+Tail Truncation") 
    elif len(input_id) <= 512: 
        input_id = input_id + [0]*(512 - len(input_id)) 
        attention_mask = attention_mask + [0]*(512-len(attention_mask)) 
        
    return input_id, attention_mask


In [13]:
electra_tokenizer = ElectraTokenizer.from_pretrained("../input/electra/large-discriminator") 

def electra_tokenizer_process(sent, MAX_LEN):  
    encoded_dict = electra_tokenizer.encode_plus(
        text = sent, 
        add_special_tokens = True, 
        pad_to_max_length = False, 
        return_attention_mask = True 
    )
    input_id = encoded_dict['input_ids'] 
    attention_mask = encoded_dict['attention_mask'] 
    if len(input_id) > 512: 
        input_id = input_id[:129] + input_id[-383:] 
        attention_mask = attention_maks[:129] + attention_mask[-383:] 
        print("Long Text!! Using Head+Tail Truncation") 
    elif len(input_id) <= 512: 
        input_id = input_id + [0]*(512 - len(input_id)) 
        attention_mask = attention_mask + [0]*(512-len(attention_mask)) 
        
    return input_id, attention_mask

In [14]:
BATCH_SIZE = 16
NUM_EPOCHS = 10 
VALID_SPLIT = 0.1 
MAX_LEN = 512


test = pd.read_csv("../input/commonlitreadabilityprize/test.csv") 
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

device = torch.device("cuda")


test_texts = test['excerpt'].values 

roberta_predictions = [] 

for text in tqdm(test_texts, position=0,leave=True): 
    input_id, attention_mask = roberta_tokenizer_process(text, MAX_LEN=MAX_LEN) 
    input_id = torch.tensor(input_id, dtype=int) 
    attention_mask = torch.tensor(attention_mask, dtype=int) 
    
    input_id = torch.reshape(input_id, (-1,MAX_LEN)) 
    attention_mask = torch.reshape(attention_mask, (-1,MAX_LEN)) 
    
    input_id = input_id.to(device) 
    attention_mask = attention_mask.to(device) 
    
    with torch.no_grad(): 
            outputs = roberta_test(input_id, 
                                   token_type_ids=None, 
                                   attention_mask=attention_mask) 
    
    yhat = outputs[0].item() 
    roberta_predictions.append(yhat)
    


100%|██████████| 7/7 [00:01<00:00,  5.16it/s]


In [15]:
electra_predictions = [] 

for text in tqdm(test_texts, position=0,leave=True): 
    input_id, attention_mask = electra_tokenizer_process(text, MAX_LEN=MAX_LEN) 
    input_id = torch.tensor(input_id, dtype=int) 
    attention_mask = torch.tensor(attention_mask, dtype=int) 
    
    input_id = torch.reshape(input_id, (-1,MAX_LEN)) 
    attention_mask = torch.reshape(attention_mask, (-1,MAX_LEN)) 
    
    input_id = input_id.to(device) 
    attention_mask = attention_mask.to(device) 
    
    with torch.no_grad(): 
            outputs = electra_test(input_id, 
                                   token_type_ids=None, 
                                   attention_mask=attention_mask) 
    
    yhat = outputs[0].item() 
    electra_predictions.append(yhat)
    


100%|██████████| 7/7 [00:00<00:00, 12.91it/s]


In [16]:
roberta_predictions

[0.1232486218214035,
 -0.3321461081504822,
 -0.38081932067871094,
 -1.8343415260314941,
 -1.7079259157180786,
 -0.8954422473907471,
 0.4654872417449951]

In [17]:
electra_predictions

[-0.07344838976860046,
 -0.265074223279953,
 -0.36824485659599304,
 -2.005181312561035,
 -1.8369956016540527,
 -0.8404904007911682,
 -0.0658130794763565]

In [19]:
avg_predictions = (np.array(roberta_predictions) + np.array(electra_predictions)) / 2.0 

In [21]:
submission.iloc[:,1] = avg_predictions 

submission

Unnamed: 0,id,target
0,c0f722661,0.0249
1,f0953f0a5,-0.29861
2,0df072751,-0.374532
3,04caf4e0c,-1.919761
4,0e63f8bea,-1.772461
5,12537fe78,-0.867966
6,965e592c0,0.199837


In [22]:
submission.to_csv("submission.csv",index=False)