In [1]:
import transformers
import torch
import logging
import os
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

logging.basicConfig(level=logging.ERROR)

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA: {torch.cuda.device_count()}, use {torch.cuda.get_device_name(0)}")

elif torch.mps.is_available():
    device = torch.device("mps")
    print(f"MPS: {torch.mps.device_count()}")

else:
    device = torch.device("cpu")
    print("CPU")

print("Transformers version:", transformers.__version__)
print("PyTorch version:", torch.__version__)



MPS: 1
Transformers version: 4.47.1
PyTorch version: 2.5.1


In [2]:
tokenizer = transformers.GPT2Tokenizer.from_pretrained('gpt2')

In [3]:
import json
with open("../data/raw_data/train_claims_quantemp.json") as f:
  train_data = json.load(f)

len(train_data)

9935

In [4]:
train_data[2]

{'crawled_date': '2023-05-25T15:13:20',
 'country_of_origin': 'france',
 'label': 'False',
 'url': 'https://factcheck.afp.com/gates-foundation-targeted-misleading-claims-about-india-polio-vaccine-campaign',
 'lang': 'en',
 'claim': "Bill Gates' foundation tested a polio vaccine in India that left at least 490,000 children paralyzed",
 'taxonomy_label': 'statistical',
 'label_original': 'Misleading'}

In [5]:
import json
with open("../data/raw_data/val_claims_quantemp.json") as f:
  val_data = json.load(f)

len(val_data)

3084

In [6]:

val_data[2]

{'country_of_origin': 'usa',
 'label': 'True',
 'url': 'https://www.politifact.com/factchecks/2018/sep/18/house-majority-pac/dino-rossi-take-away-coverage-45000-children/',
 'lang': 'en',
 'claim': 'Says Dino Rossi "stripped" health care "from 45,000 children."',
 'doc': 'Did Dino Rossi, a Republican Washington state senator running for U.S. Congress, once take away coverage from 45,000 children? That’s what an attack ad by the House Majority PACclaimed. "He’s back. Dino Rossi," the voiceover in the ad says. "Rossi first showed up in Olympia decades ago. He went to work! Health care stripped from 45,000 children." They’re referencing a 2003 budget measure, back when Rossi was the state Senate Ways and Means chairman. The budget was looking bleak, so Republicans proposed large spending cuts that year. One, proposed by Rossi, was a $50 million cut in Medicaid spending. The cut would have eliminated coverage for nearly 40,000 low-income children, according to reporting at the time. But th

In [7]:
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()

In [8]:
def get_features(data):
    features = []
    evidences = []

    for index, fact in enumerate(data):
        claim = fact["claim"]


        feature = "[Claim]:"+claim+"[Evidences]:"+fact["doc"]
        features.append(feature)
    return features

In [9]:
train_features = get_features(train_data)

In [10]:
len(train_features)

9935

In [11]:
train_features[1]

'[Claim]:Florida residents affected by Hurricane Irma can receive $197 in food stamp benefits, but only if they can show that their homes lost power for more than two hours.[Evidences]:In September 2017, Facebook users spread several rumors about government benefits and assistance available to Florida residents in the aftermath of Hurricane Irma. One of these rumors involved the provision of food stamps for Floridians whose homes had experienced power outages of at least two hours in duration. A typical expression of the rumor read as follows: #FLORIDA If your power was out for more than two hours. You are eligible for food stamps. $197.00 per person. Go towww.myflorida.com/accessflorida… Apply for benefits..create an account. Click on food stamps or snaps..DO NOT APPLY FOR CASH…Make sure you have a valid phone number so they can verify your zip code… A similar rumor claimed that the Federal Emergency Management Agency (FEMA), rather than the state of Florida, was overseeing food stamp

In [12]:
val_features = get_features(val_data)

In [13]:
len(val_features)

3084

In [14]:
val_features[2]

'[Claim]:Says Dino Rossi "stripped" health care "from 45,000 children."[Evidences]:Did Dino Rossi, a Republican Washington state senator running for U.S. Congress, once take away coverage from 45,000 children? That’s what an attack ad by the House Majority PACclaimed. "He’s back. Dino Rossi," the voiceover in the ad says. "Rossi first showed up in Olympia decades ago. He went to work! Health care stripped from 45,000 children." They’re referencing a 2003 budget measure, back when Rossi was the state Senate Ways and Means chairman. The budget was looking bleak, so Republicans proposed large spending cuts that year. One, proposed by Rossi, was a $50 million cut in Medicaid spending. The cut would have eliminated coverage for nearly 40,000 low-income children, according to reporting at the time. But the cut didn’t fly with Democrats, and ended up being scrapped in the final budget. The final budget, which won bipartisan support, wasn\'t a specific cut, but it did include rules changes. Th

In [15]:
train_labels = [fact["label"] for fact in train_data]
val_labels = [fact["label"] for fact in val_data]

In [16]:
train_labels_final = LE.fit_transform(train_labels)
train_labels_final

array([1, 1, 1, ..., 0, 0, 1])

In [17]:
train_labels_final[:20]

array([1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 0])

In [18]:
val_labels_final = LE.transform(val_labels)
val_labels_final

array([1, 1, 2, ..., 1, 0, 2])

In [19]:
val_data[-1]

{'crawled_date': '2022-10-06T21:00:06',
 'country_of_origin': 'usa',
 'label': 'True',
 'url': 'https://www.politifact.com/factchecks/2021/oct/28/randy-feenstra/biden-administration-predicted-liquid-fuel-cars-ou/',
 'lang': 'en',
 'claim': 'The Biden administration "published a study concluding 4 (of) 5 new cars on the road by 2050 will still require liquid fuels."',
 'doc': 'President Joe Biden was in Michigan’s auto industry hub on Oct. 5 when he said, "the whole world knows that the future of the auto industry is electric." Rep. Randy Feenstra, R-Iowa, had a quick response, writing on Twitter: ".@POTUS no it’s not — in fact, your own administration published a study concluding 4/5 new cars on the road by 2050 will still require liquid fuels ... "It’s past time Biden lives up to his promise to expand clean-burning #biofuels. Don’t mess with the RFS!" Feenstra is correct about the share of cars in the United States projected to use liquid fuels. The U.S. Energy Information Administrat

In [20]:
len(val_labels_final)

3084

In [21]:
input_ids = []
attention_masks = []

tokenizer.pad_token = tokenizer.eos_token
for sent in train_features:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 256,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        truncation=True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )

    # Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])
    print(encoded_dict['input_ids'].shape)

    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])
# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)


# Print sentence 0, now as a list of IDs.
print('Original: ', train_features[0])
print('Token IDs:', input_ids[0])

torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])




torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1, 256])
torch.Size([1

In [22]:
val_input_ids = []
val_attention_masks = []

for sent in val_features:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 256,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        truncation=True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )

    # Add the encoded sentence to the list.
    val_input_ids.append(encoded_dict['input_ids'])

    # And its attention mask (simply differentiates padding from non-padding).
    val_attention_masks.append(encoded_dict['attention_mask'])
# Convert the lists into tensors.
val_input_ids = torch.cat(val_input_ids, dim=0)
val_attention_masks = torch.cat(val_attention_masks, dim=0)


# Print sentence 0, now as a list of IDs.
print('Original: ', val_features[0])
print('Token IDs:', val_input_ids[0])

Original:  [Claim]:Amit Shah said Narendra Modi sleeps for 24 hours for the welfare of the poor.[Evidences]:The India Today Anti-Fake News War Room found the viral video of Amit Shah's statement was clipped and presented out of context. A short video clip of Union Home Minister Amit Shah has gone viral with the claim that at a political rally, he said that Prime Minister Narendra Modi sleeps 24 hours for the welfare of the poor. Several Twitter and Facebook users shared this video clip with captions like, “Modi ji sleeps for 24 hours”. The India Today Anti-Fake News War Room ( AFWA) found the viral video was clipped and presented out of context to give it a different meaning. In the original video, Shah can be heard saying that PM Modi thinks about the welfare of the poor 24 hours a day while “Didi” (Mamta Banerjee) wonders when her nephew would become the Chief Minister. Shah made the statement while addressing a public meeting in Chapra, West Bengal, in April 2021. The viral posts ar

In [23]:
train_labels_final = torch.tensor(train_labels_final)
val_labels_final = torch.tensor(val_labels_final)

In [24]:
val_labels_final.shape

torch.Size([3084])

In [25]:
len(val_input_ids)

3084

In [26]:
num_classes = len(list(set(train_labels)))
list(set(train_labels))

['Conflicting', 'True', 'False']

In [27]:

num_classes

3

In [28]:
LE.classes_

array(['Conflicting', 'False', 'True'], dtype='<U11')

In [29]:
from torch.utils.data import TensorDataset, random_split
# train_poincare_tensor = torch.tensor(poincare_embeddings_final,dtype=torch.float)
# difficulty_tensor = torch.tensor(difficulty_level_vectors,dtype=torch.float)
# Combine the training inputs into a TensorDataset.
dataset = TensorDataset(input_ids, attention_masks, train_labels_final)
val_dataset = TensorDataset(val_input_ids, val_attention_masks,val_labels_final)


print(dataset.tensors[0].shape)
print(dataset.tensors[1].shape)
print(dataset.tensors[2].shape)
#

torch.Size([9935, 256])
torch.Size([9935, 256])
torch.Size([9935])


In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
batch_size = 16
train_dataloader = DataLoader(
            dataset,  # The training samples.
            sampler = RandomSampler(dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset),
            batch_size = batch_size
        )


In [None]:
from torch import nn
class MultiClassClassifier(nn.Module):
    def __init__(self, model_path, labels_count, hidden_dim=768, mlp_dim=500, extras_dim=100, dropout=0.1, freeze_bert=False):
        super().__init__()

        self.gpt2 = transformers.GPT2Model.from_pretrained(model_path, output_attentions=True, output_hidden_states=True)
        # self.gpt2.load_state_dict(state_dict_final,strict=False)

        self.dropout = nn.Dropout(dropout)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_dim, mlp_dim),
            nn.ReLU(),
            # nn.Linear(mlp_dim, mlp_dim),
            # # nn.ReLU(),
            # # nn.Linear(mlp_dim, mlp_dim),
            # nn.ReLU(),
            nn.Linear(mlp_dim, labels_count)
        )
        # self.softmax = nn.LogSoftmax(dim=1)
        if freeze_bert:
            print("Freezing layers")
            for param in self.gpt2.parameters():
                param.requires_grad = False

    def forward(self, tokens, masks):
        output = self.gpt2(tokens, attention_mask=masks)
        # print(f'gpt output: {output[0].shape}')
        last_hidden_state = output[0]  # Shape: [batch_size, seq_len, hidden_size]
        # print(f'last_hidden_state: {last_hidden_state.shape}')
        cls_representation = last_hidden_state[:, 0, :]  # Shape: [batch_size, hidden_size]
        # print(f'cls_representation: {cls_representation.shape}')

        # Ensure cls_representation is of shape [batch_size, hidden_dim] (768 in your case)
        dropout_output = self.dropout(cls_representation)  # Apply dropout
        # print(f'dropout_output: {dropout_output.shape}')
        mlp_output = self.mlp(dropout_output)  # Pass through MLP

        return mlp_output

In [None]:
# state_dict = torch.load("/content/FinQA_ELASTIC-RoBerta-large/checkpoint_best_0.65.pt", map_location="cpu")
#
# state_dict_final = {}
# for key,value in state_dict.items():
#   if "plm_model" in key:
#     state_dict_final[key.split("plm_model.")[1]] = value

In [None]:
from transformers import AdamW

# Loads BertForSequenceClassification, the pretrained BERT model with a single
model = MultiClassClassifier("gpt2", num_classes, 768, 1024, 140, dropout=0.1, freeze_bert=False)

# model.load_state_dict(torch.load("model_bert_difficulty_prediction/model_weights"))

# Tell pytorch to run this model on the GPU.


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

In [None]:
model = model.to(device)

In [None]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5,
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )



In [None]:
from transformers import get_linear_schedule_with_warmup


epochs = 20

# Total number of training steps is [number of batches] x [number of epochs].
total_steps = len(train_dataloader) * epochs



In [None]:
len(train_dataloader)

621

In [None]:
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

In [None]:
import numpy as np

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [None]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))

    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [None]:
# for param in model.gpt2.encoder.layer[0:5].parameters():
#     param.requires_grad=False

In [None]:
loss_func = nn.CrossEntropyLoss()


In [None]:
import random
import os
import numpy as np


# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store a number of quantities such as training and validation loss,
# validation accuracy, and timings.
training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()
early_stopping = EarlyStopping(patience=3, verbose=True)
# For each epoch...
for epoch_i in range(0, epochs):

    # ========================================
    #               Training
    # ========================================

    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_accuracy = 0
    total_train_loss = 0

    # Put the model into training mode. Don't be mislead--the call to
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questimport gensim.downloader as api
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)

            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        # b_poincare = batch[2].to(device)
        # b_difficulty = batch[3].to(device)
        b_labels = batch[2].to(device)
        # skill_labels = batch[3].to(device)

        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically because
        # accumulating the gradients is "convenient while training RNNs".
        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()

        # Perform a forward pass (evaluate the model on this training batch).
        probas = model(b_input_ids,b_input_mask)

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value
        # from the tensor.
        loss = loss_func(probas, b_labels)
        total_train_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        # scheduler.step()
        logits = probas.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        total_train_accuracy += flat_accuracy(logits, label_ids)
    avg_train_accuracy = total_train_accuracy / len(train_dataloader)
    print(" Train Accuracy: {0:.2f}".format(avg_train_accuracy))

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)



    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)

        b_input_mask = batch[1].to(device)
        # b_poincare = batch[2].to(device)
        # b_difficulty = batch[3].to(device)
        b_labels = batch[2].to(device)
        # skill_labels = batch[3].to(device)

        # Tell pytorch not to bother with constructing the compute graph during
        # the forward pass, since this is only needed for backprop (training).
        with torch.no_grad():

            # Forward pass, calculate logit predictions.

          logits = model(b_input_ids,b_input_mask)

        # Accumulate the validation loss.
        loss = loss_func(logits, b_labels)
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        total_eval_accuracy += flat_accuracy(logits, label_ids)


    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    early_stopping(avg_val_loss, model)
    if early_stopping.early_stop:
      print("Early stopping")
      break
    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))
    output_dir = 'finqa_roberta_claimdecomp_continued/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print("Saving model to %s" % output_dir)
    tokenizer.save_pretrained(output_dir)
    torch.save(model.state_dict(), os.path.join(output_dir, 'model_weights'))

    # !rm -rf "/content/drive/My Drive/ecir_compnumfacts/finqa_roberta_claimdecomp_continued"
    # !mv finqa_roberta_claimdecomp_continued "/content/drive/My Drive/ecir_compnumfacts/"
    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...




  Batch    40  of    621.    Elapsed: 0:00:30.
  Batch    80  of    621.    Elapsed: 0:01:00.
  Batch   120  of    621.    Elapsed: 0:01:31.
  Batch   160  of    621.    Elapsed: 0:02:01.
  Batch   200  of    621.    Elapsed: 0:02:31.
  Batch   240  of    621.    Elapsed: 0:03:01.
  Batch   280  of    621.    Elapsed: 0:03:31.
  Batch   320  of    621.    Elapsed: 0:04:02.
  Batch   360  of    621.    Elapsed: 0:04:32.
  Batch   400  of    621.    Elapsed: 0:05:02.
  Batch   440  of    621.    Elapsed: 0:05:32.
  Batch   480  of    621.    Elapsed: 0:06:03.
  Batch   520  of    621.    Elapsed: 0:06:33.
  Batch   560  of    621.    Elapsed: 0:07:03.
  Batch   600  of    621.    Elapsed: 0:07:34.
 Train Accuracy: 0.57

  Average training loss: 0.99
  Training epcoh took: 0:07:49

Running Validation...
  Accuracy: 0.58
Validation loss decreased (inf --> 0.969733).  Saving model ...
  Validation Loss: 0.97
  Validation took: 0:01:00
Saving model to finqa_roberta_claimdecomp_continued/

Tr

In [None]:
tokenizer.save_pretrained("models/gpt2-ft-tokenizer")
torch.save(model.state_dict(), os.path.join("models/", 'model_weights'))

# !rm -rf "/content/drive/My Drive/ecir_compnumfacts/finqa_roberta_claimdecomp_continued_early_stop"
# !mv finqa_roberta_claimdecomp_continued_early_stop "/content/drive/My Drive/ecir_compnumfacts/"

In [None]:
LE.inverse_transform([0,1,2])

array(['Conflicting', 'False', 'True'], dtype='<U11')

In [None]:


from huggingface_hub import login

In [None]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
LE.classes_

array(['Conflicting', 'False', 'True'], dtype='<U11')