# **Import Necessay Library**

In [1]:
import csv
import pandas as pd
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
from torch.optim import lr_scheduler
from torch import nn
from torch.optim import Adam
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm

In [2]:
if torch.cuda.is_available():
    device = 'cuda: 0'
else:
    device = 'cpu'

# **Load the data**

In [3]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5


In [4]:
test = pd.read_csv('test.csv')

In [5]:
def mcr_mse(y_trues, y_preds):
    scores = []
    idxes = y_trues.shape[1]
    for i in range(idxes):
        y_true = y_trues[:,i]
        y_pred = y_preds[:,i]
        score = mean_squared_error(y_true, y_pred, squared=False)
        scores.append(score)
    mcrmse_score = np.mean(scores)
    return mcrmse_score, scores

In [6]:
df_train, df_val, df_test = np.split(df.sample(frac=1,random_state=42),
                                    [int(.8*len(df)),
                                    int(.9*len(df))])

print(f'Train_Shape: {len(df_train)},Val_Shape: {len(df_val)},Test_Shape: {len(df_test)}')

Train_Shape: 3128,Val_Shape: 391,Test_Shape: 392


In [7]:
#DatasetClass

tokenizer = AutoTokenizer.from_pretrained('bert-large-cased')

class Dataset(torch.utils.data.Dataset):
    
    def __init__(self,df):
        
        self.labels = df[["cohesion","syntax","vocabulary","phraseology","grammar","conventions"]].reset_index()
        self.texts = df[["full_text"]].reset_index()
    
    def get_classes(self):
        return self.labels
    
    def __len__(self):
        return len(self.labels)
    
    def get_labels(self, idx):
        return np.array(self.labels.loc[idx].values[1:].astype(float))
        
        
    def get_text(self,idx):
        return tokenizer(self.texts.loc[idx].values[1],
                        padding= 'max_length',max_length=512,truncation=True,
                        return_tensors='pt')
    
    def __getitem__(self,idx):
        text = self.get_text(idx)
        y = self.get_labels(idx)
        return text, y

# Build the Model

In [14]:
class BERTModel(nn.Module):
    
    def __init__(self, dropout=0.1):
        
        super(BERTModel, self).__init__()
        self.bert = AutoModel.from_pretrained('bert-large-cased')
        self.dropout = nn.Dropout(dropout,0)
        self.linear = nn.Linear(1024,256)
        self.relu = nn.LeakyReLU(0.1)
        self.linear_2 = nn.Linear(256, 128)
        self.out = nn.Linear(128,6)
        
        
    def forward(self,input_id,mask):
        _, x = self.bert(input_ids=input_id,attention_mask=mask,return_dict=False)
        x = self.dropout(x)
        x = self.linear(x)
        x = self.relu(x)
        x = self.linear_2(x)
        x = self.relu(x)
        final_layer = self.out(x)
        return final_layer
        
        

In [None]:
model = BERTModel()

# **Train the Model**

In [15]:
def train(model, train_data, val_data, epochs):

    train, val = Dataset(train_data), Dataset(val_data)
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    device = torch.device('cuda:0') 
    criterion = nn.MSELoss()
    optimizer = Adam(model.parameters(), lr=1e-5)
    scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=500,  eta_min=1e-6)

    device = torch.device("cuda")

    model = model.cuda()
    criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_loss_train = 0

            for train_input, train_labels in tqdm(train_dataloader):

                train_labels = train_labels.to(device).float()
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                loss = criterion(output, train_labels)
                total_loss_train += loss.item()
                
                model.zero_grad()
                loss.backward()
                optimizer.step()
                scheduler.step()
            
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    loss = criterion(output, val_label)
                    total_loss_val += loss.item()
                                
            print(f'Epoch: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} | Val Loss: {total_loss_val / len(val_data): .3f}')

In [None]:
EPOCHS = 20
train(model, df_train, df_val, EPOCHS)

# Evaluation class


In [22]:
#valid

def evaluate(model, test_data):
    
    test =  Dataset(test_data)
    
    test_dataloader = torch.utils.data.DataLoader(test,batch_size=2)
    criterion = nn.MSELoss()
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    if use_cuda:
        
        model = model.cuda()
        
    total_loss_test = 0
    with torch.no_grad():
        for test_input, test_labels in tqdm(test_dataloader):
            test_labels =  test_labels.to(device)
            mask = test_input["attention_mask"].to(device)
            input_id = test_input["input_ids"].squeeze(1).to(device)
            
            output =  model(input_id, mask)
            
            loss =  criterion(output,test_labels)
            mcrmse = mcr_mse(output.cpu(), test_labels.cpu())
            total_loss_test += loss
            
    print(f'Test_Loss: {total_loss_test / len(test_data): .3f}')
    print(mcrmse)
            
    

In [17]:
class testDataset(torch.utils.data.Dataset):
    
    def __init__(self,df):
        
        self.texts = df[["full_text"]].reset_index()
    
    
    def __len__(self):
        return len(self.texts)
           
        
    def get_text(self,idx):
        return tokenizer(self.texts.loc[idx].values[1], padding= 'max_length',max_length=512,truncation=True, return_tensors='pt')
    
    def __getitem__(self,idx):
        
        text = self.get_text(idx)
        
        return text

def get_output(model, test_data=test):
    
    test =  testDataset(test_data)
    
    test_dataloader = torch.utils.data.DataLoader(test,batch_size=1)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    if use_cuda:
        
        model = model.cuda()
        
    out = []
    with torch.no_grad():
        for test_input in tqdm(test_dataloader):
            mask = test_input["attention_mask"].to(device)
            input_id = test_input["input_ids"].squeeze(1).to(device)
            output =  model(input_id, mask)
            out.append(output.tolist())
                                               
    return out
    
           
    

In [23]:
evaluate(model, df_test)

  0%|          | 0/196 [00:00<?, ?it/s]

Test_Loss:  0.120
(0.32317738508267707, [0.2999138478436522, 0.4119317281417336, 0.2106312585585642, 0.30339049405778945, 0.5232086579992391, 0.18998832389508372])
