In [None]:
import torch
import pandas as pd
import transformers
import torch.nn as nn
from sklearn import model_selection
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
import time
import numpy as np

In [None]:
class CONFIG:
    MAX_LEN = 512
    TRAIN_BATCH_SIZE = 8
    VALID_BATCH_SIZE = 4
    EPOCHS = 6
    BERT_BASE_PATH = r'../input/bert-base-uncased'
    MODEL_PATH = r'../input/commonlitbert-basic/final_model.bin'
    TRAINING_FILE = r'../input/commonlitreadabilityprize/train.csv'
    TOKENIZER = transformers.BertTokenizer.from_pretrained(BERT_BASE_PATH, do_lower_case=True)

In [None]:
class BERTBaseUncased(nn.Module):
    def __init__(self):
        super(BERTBaseUncased, self).__init__()
        self.bert = transformers.BertModel.from_pretrained(CONFIG.BERT_BASE_PATH)
        self.bert_drop = nn.Dropout(0.3)
        self.out = nn.Linear(768, 1)
        
    def forward(self, ids, mask, token_type_ids):
        o1, o2 = self.bert(ids, 
                     attention_mask=mask, 
                     token_type_ids=token_type_ids,
                     return_dict=False
                   )
        bo = self.bert_drop(o2)
        output = self.out(bo)
        return output

In [None]:
predict_model = BERTBaseUncased()
predict_model.load_state_dict(torch.load(CONFIG.MODEL_PATH))
def final_prediction(sentence):
    DEVICE = "cpu"
    predict_model.to(DEVICE)
    predict_model.eval()
    tokenizer = CONFIG.TOKENIZER
    max_len = CONFIG.MAX_LEN
    review = str(sentence)
    review = " ".join(review.split())

    inputs = tokenizer.encode_plus(
        review, None, add_special_tokens=True, max_length=max_len
    )

    ids = inputs["input_ids"]
    mask = inputs["attention_mask"]
    token_type_ids = inputs["token_type_ids"]

    padding_length = max_len - len(ids)
    ids = ids + ([0] * padding_length)
    mask = mask + ([0] * padding_length)
    token_type_ids = token_type_ids + ([0] * padding_length)

    ids = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    mask = torch.tensor(mask, dtype=torch.long).unsqueeze(0)
    token_type_ids = torch.tensor(token_type_ids, dtype=torch.long).unsqueeze(0)

    ids = ids.to(DEVICE, dtype=torch.long)
    token_type_ids = token_type_ids.to(DEVICE, dtype=torch.long)
    mask = mask.to(DEVICE, dtype=torch.long)

    outputs = predict_model(ids=ids, mask=mask, token_type_ids=token_type_ids)
    
    print(outputs)
    return outputs
    
# final_prediction('THis is a sample sentence')

In [None]:
test_df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
final_predictions = []
for sentence in test_df.excerpt:
    pred_score = final_prediction(sentence)
    final_predictions.append(pred_score.tolist()[0][0])

In [None]:
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
submission.target = final_predictions
submission.to_csv("submission.csv", index=False)