In [1]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from sklearn.metrics import classification_report
import numpy as np
from sklearn.model_selection import train_test_split
import config
from model import roberta_model
from transformers import get_linear_schedule_with_warmup, AdamW
import dataset
from torch import nn, optim
from collections import defaultdict
from tqdm import tqdm
import engine
import transformers

In [2]:
df = pd.read_csv('../cleaned_nli.csv')

In [3]:
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,premise,hypothesis,labels,preprocessed_premise,labels_encoded
0,0,0,BANK MANAGER : FD tudwa rahe aap ? Time se peh...,TITLI ka koi interest nahi katega,contradictory,FD tudwa rahe aap ? Time se pehle ? Woji ch...,1
1,1,1,ROHAN : Tumhaare liye ... ## ROHAN : Ye bhi .....,'',entailment,Tumhaare liye ... Ye bhi ... agar padhne ka...,0
2,2,2,BHAIRAV : Paanch minute late ... ## ROHAN : Ba...,BHAIRAV ne ROHAN ke dress ko leechad kapda kaha.,entailment,Paanch minute late ... Baal bana raha tha ....,0
3,3,3,ASLAM : Paanch minute ... ## ASLAM : Pack ... ...,ASLAM wanted to go to toilet.,entailment,Paanch minute ... Pack ... Main zara halka ...,0
4,4,4,APU : Kaun se school mein tha ? ## ROHAN : Bis...,ROHAN Shimla se hai.,contradictory,Kaun se school mein tha ? Bishop Cotton . ...,1


In [4]:
combined_thesis = df[['preprocessed_premise', 'hypothesis']].values.tolist()

In [5]:
df['combined_thesis'] = combined_thesis
df['label'] = df['labels_encoded']

In [6]:
df_train, df_val = train_test_split(
    df,
    test_size = 0.1,
    random_state = 0 )

In [7]:
train_data_loader = dataset.create_data_loader(df_train, config.tokenizer, config.max_len, config.batch_size)
val_data_loader = dataset.create_data_loader(df_val, config.tokenizer, config.max_len, config.batch_size)

In [8]:
device = torch.device(config.DEVICE)
model = roberta_model(2)
#model= nn.DataParallel(model)
model = model.to(device)

Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

In [9]:
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * config.EPOCHS

scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=0,
  num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)

history = defaultdict(list)
best_accuracy = 0



In [10]:
print ("Training...\n")
for epochs in tqdm(range(config.EPOCHS)):

    print ('Epoch {} \n'.format(epochs + 1))
    print ("-"*100)

    train_acc, train_loss = engine.train_roberta_epoch(
        model,
        train_data_loader,
        loss_fn,
        optimizer,
        device,
        scheduler,
        len(df_train)
        )

    print ('train_acc {} train_loss {}'.format(train_acc, train_loss))

    val_acc, val_loss = engine.eval_roberta(
        model,
        val_data_loader,
        loss_fn,
        device,
        len(df_val)
        ) 

    print ('val_acc {} val_loss {}'.format(val_acc, val_loss))

    print ()

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    if val_acc > best_accuracy:
        # here model.state_dict() will save the model and optimizer's parameter
        torch.save(model.state_dict(), 'best_model.bin')
        best_accuracy = val_acc

print ("Training completed...")
print ("Testing...\n")

Training...



  0%|                                                                                                          | 0/2 [00:00<?, ?it/s]

Epoch 1 

----------------------------------------------------------------------------------------------------
LEN D 4


  0%|                                                                                                          | 0/2 [00:00<?, ?it/s]


ValueError: Expected input batch_size (128) to match target batch_size (16).

In [15]:
roberta = transformers.XLMRobertaForSequenceClassification.from_pretrained(config.ROBERTA_PATH, num_labels = 2)

Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.den

In [17]:
roberta.config.hidden_size

1024