In [None]:
import torch
import pandas as pd
import transformers
import torch.nn as nn
from sklearn import model_selection
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
import time
import numpy as np

In [None]:
class CONFIG:
    MAX_LEN = 512
    TRAIN_BATCH_SIZE = 8
    VALID_BATCH_SIZE = 4
    EPOCHS = 6
    BERT_BASE_PATH = r'../input/bert-base-uncased'
    MODEL_PATH = r'../working/final_model_.bin'
    TRAINING_FILE = r'../input/commonlitreadabilityprize/train.csv'
    TOKENIZER = transformers.BertTokenizer.from_pretrained(BERT_BASE_PATH, do_lower_case=True)

In [None]:
class BERTDAtaset:
    def __init__(self, review, target):
        self.review = review
        self.target = target
        self.tokenizer = CONFIG.TOKENIZER
        self.max_len = CONFIG.MAX_LEN
    
    def __len__(self):
        return len(self.review)
    
    def __getitem__(self, item_index):
        review = str(self.review[item_index])
        review = " ".join(review.split())
        inputs = self.tokenizer.encode_plus(
            review,
            None,
            add_special_tokens=True,
            max_length=self.max_len            
        )
        
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        
        padding_len = self.max_len - len(ids)
        
        ids = ids + [0]*padding_len
        mask = mask + [0]*padding_len
        token_type_ids = token_type_ids + [0]*padding_len
        
        return {
            "ids": torch.tensor(ids, dtype=torch.long),
            "mask": torch.tensor(mask, dtype=torch.long),
            "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
            "target": torch.tensor(self.target[item_index], dtype=torch.long)
        }

In [None]:
class BERTBaseUncased(nn.Module):
    def __init__(self):
        super(BERTBaseUncased, self).__init__()
        self.bert = transformers.BertModel.from_pretrained(CONFIG.BERT_BASE_PATH)
#         self.bert_drop = nn.Dropout(0.3)
#         self.out = nn.Linear(768, 1)
        
        # dropout layer
        self.dropout = nn.Dropout(0.3)

        # relu activation function
        self.relu =  nn.ReLU()

          # dense layer 1
        self.fc1 = nn.Linear(768,512)

          # dense layer 2 (Output layer)
        self.fc2 = nn.Linear(512,1)
        
    def forward(self, ids, mask, token_type_ids):
        o1, o2 = self.bert(ids, 
                     attention_mask=mask, 
                     token_type_ids=token_type_ids,
                     return_dict=False
                   )
        bo = self.fc1(o2)
        bo = self.relu(bo)
        bo = self.dropout(bo)
        output = self.fc2(bo)
#         output = self.out(bo)
        return output

In [None]:
from tqdm import tqdm

In [None]:
def loss_fn(outputs, targets):
    criterion = nn.MSELoss()
    loss = criterion(outputs, targets)
    return loss

In [None]:
def train_fn(data_loader, model, optimizer, device, scheduler):
    model.train() # Putting model in training mode
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
        ids = d["ids"]
        mask = d["mask"]
        token_type_ids = d["token_type_ids"]
        targets = d["target"]
        
        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        
        targets = targets.to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        
        outputs = model(
                    ids=ids,
                    mask=mask,
                    token_type_ids=token_type_ids)
        
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
def eval_fn(data_loader, model, device):
    model.eval()
    fin_loss = []
    fin_outputs = []
    with torch.no_grad():
        for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
            ids = d["ids"]
            mask = d["mask"]
            token_type_ids = d["token_type_ids"]
            targets = d["target"]

            ids = ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            token_type_ids = token_type_ids.to(device, dtype=torch.long)

            targets = targets.to(device, dtype=torch.float)

            outputs = model(
                        ids=ids,
                        mask=mask,
                        token_type_ids=token_type_ids)

            eval_loss = loss_fn(outputs, targets)
            
    return eval_loss

In [None]:
def run():
    dfx = pd.read_csv(CONFIG.TRAINING_FILE).fillna("none")
    df_train, df_valid = model_selection.train_test_split(dfx, test_size=0.1, random_state=42)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    train_dataset = BERTDAtaset(
                review=df_train.excerpt.values,
                target=df_train.target.values
                )
    valid_dataset = BERTDAtaset(
                review=df_valid.excerpt.values,
                target=df_valid.target.values
                )

    train_data_loader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=CONFIG.TRAIN_BATCH_SIZE,
                num_workers=4
                )

    valid_data_loader = torch.utils.data.DataLoader(
                valid_dataset,
                batch_size=CONFIG.VALID_BATCH_SIZE,
                num_workers=1
                )

    device = torch.device("cuda")
    model = BERTBaseUncased()
    model.to(device)
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay":0.001},
        {"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay":0.0}
        ]

    num_training_steps = int(len(df_train) / CONFIG.TRAIN_BATCH_SIZE * CONFIG.EPOCHS)
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=num_training_steps
            )
    best_loss = np.inf
    for epoch in range(CONFIG.EPOCHS):
        epoch_start_time = time.time()
        train_fn(train_data_loader, model, optimizer, device, scheduler)
        final_eval_loss = eval_fn(valid_data_loader, model, device)
        print(f"Validation loss = {final_eval_loss}")
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               final_eval_loss))
        print('-' * 59)
        try:
            if final_eval_loss < best_loss:
                torch.save(model.state_dict(), CONFIG.MODEL_PATH)
                best_loss = final_eval_loss
        except Exception as e:
            print(e)

In [None]:
if __name__=="__main__":
    run()

# torch.save(model.state_dict(), CONFIG.MODEL_PATH)

In [None]:
# for bi, d in tqdm(enumerate(train_data_loader), total=len(train_data_loader)):
#     print(d)
#     break

In [None]:
# def final_prediction(sentence):
#     predict_model = BERTBaseUncased()
#     predict_model.load_state_dict(torch.load(CONFIG.MODEL_PATH))
#     DEVICE = "cpu"
#     predict_model.to(DEVICE)
#     predict_model.eval()
#     tokenizer = CONFIG.TOKENIZER
#     max_len = CONFIG.MAX_LEN
#     review = str(sentence)
#     review = " ".join(review.split())

#     inputs = tokenizer.encode_plus(
#         review, None, add_special_tokens=True, max_length=max_len
#     )

#     ids = inputs["input_ids"]
#     mask = inputs["attention_mask"]
#     token_type_ids = inputs["token_type_ids"]

#     padding_length = max_len - len(ids)
#     ids = ids + ([0] * padding_length)
#     mask = mask + ([0] * padding_length)
#     token_type_ids = token_type_ids + ([0] * padding_length)

#     ids = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
#     mask = torch.tensor(mask, dtype=torch.long).unsqueeze(0)
#     token_type_ids = torch.tensor(token_type_ids, dtype=torch.long).unsqueeze(0)

#     ids = ids.to(DEVICE, dtype=torch.long)
#     token_type_ids = token_type_ids.to(DEVICE, dtype=torch.long)
#     mask = mask.to(DEVICE, dtype=torch.long)

#     outputs = predict_model(ids=ids, mask=mask, token_type_ids=token_type_ids)
    
#     print(outputs)
#     return outputs
    
# final_prediction('THis is a sample sentence')

In [None]:
# test_df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
# final_predictions = []
# for sentence in test_df.excerpt:
#     pred_score = final_prediction(sentence)
#     final_predictions.append(pred_score.tolist()[0][0])

In [None]:
# submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
# submission.target = final_predictions
# submission.to_csv("submission.csv", index=False)

In [None]:
import os
os.listdir('../working')

In [None]:
# test_model = BERTBaseUncased()
# test_model.load_state_dict(torch.load('./final_model.bin'))

In [None]:
import numpy as np
qq = np.inf