In [1]:
! pip install torch
! pip install transformers
! pip install scikit-learn
! pip install tqdm
! pip install numpy
! pip install datasets
! pip install nltk
import nltk
nltk.download('stopwords')
! pip install scipy
! pip install transformers[torch] accelerate


Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m51.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m83.3 MB/s[0m eta [36m0:00:

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Collecting accelerate
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.21.0


In [None]:
from datasets import load_dataset
dataset_dict = load_dataset("lex_glue", 'case_hold')
#print(dataset)
#Divide into train,dev,test

from sklearn.model_selection import train_test_split

#data_list = list(dataset_dict.items())

train_set_dict = dataset_dict['test']
print(train_set_dict[1])



In [2]:
from dataclasses import dataclass
from typing import Optional, Tuple

import torch
import numpy as np
from torch import nn
from transformers.file_utils import ModelOutput


@dataclass
class SimpleOutput(ModelOutput):
    last_hidden_state: torch.FloatTensor = None
    past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
    attentions: Optional[Tuple[torch.FloatTensor]] = None
    cross_attentions: Optional[Tuple[torch.FloatTensor]] = None


def sinusoidal_init(num_embeddings: int, embedding_dim: int):
    # keep dim 0 for padding token position encoding zero vector
    position_enc = np.array([
        [pos / np.power(10000, 2 * i / embedding_dim) for i in range(embedding_dim)]
        if pos != 0 else np.zeros(embedding_dim) for pos in range(num_embeddings)])

    position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2])  # dim 2i
    position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2])  # dim 2i+1
    return torch.from_numpy(position_enc).type(torch.FloatTensor)


class HierarchicalBert(nn.Module):

    def __init__(self, encoder, max_segments=64, max_segment_length=128):
        super(HierarchicalBert, self).__init__()
        supported_models = ['bert', 'roberta', 'deberta']
        assert encoder.config.model_type in supported_models  # other model types are not supported so far
        # Pre-trained segment (token-wise) encoder, e.g., BERT
        self.encoder = encoder
        # Specs for the segment-wise encoder
        self.hidden_size = encoder.config.hidden_size
        self.max_segments = max_segments
        self.max_segment_length = max_segment_length
        # Init sinusoidal positional embeddings
        self.seg_pos_embeddings = nn.Embedding(max_segments + 1, encoder.config.hidden_size,
                                               padding_idx=0,
                                               _weight=sinusoidal_init(max_segments + 1, encoder.config.hidden_size))
        # Init segment-wise transformer-based encoder
        self.seg_encoder = nn.Transformer(d_model=encoder.config.hidden_size,
                                          nhead=encoder.config.num_attention_heads,
                                          batch_first=True, dim_feedforward=encoder.config.intermediate_size,
                                          activation=encoder.config.hidden_act,
                                          dropout=encoder.config.hidden_dropout_prob,
                                          layer_norm_eps=encoder.config.layer_norm_eps,
                                          num_encoder_layers=2, num_decoder_layers=0).encoder

    def forward(self,
                input_ids=None,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None,
                labels=None,
                output_attentions=None,
                output_hidden_states=None,
                return_dict=None,
                ):
        # Hypothetical Example
        # Batch of 4 documents: (batch_size, n_segments, max_segment_length) --> (4, 64, 128)
        # BERT-BASE encoder: 768 hidden units

        # Squash samples and segments into a single axis (batch_size * n_segments, max_segment_length) --> (256, 128)
        input_ids_reshape = input_ids.contiguous().view(-1, input_ids.size(-1))
        attention_mask_reshape = attention_mask.contiguous().view(-1, attention_mask.size(-1))
        if token_type_ids is not None:
            token_type_ids_reshape = token_type_ids.contiguous().view(-1, token_type_ids.size(-1))
        else:
            token_type_ids_reshape = None

        # Encode segments with BERT --> (256, 128, 768)
        encoder_outputs = self.encoder(input_ids=input_ids_reshape,
                                       attention_mask=attention_mask_reshape,
                                       token_type_ids=token_type_ids_reshape)[0]

        # Reshape back to (batch_size, n_segments, max_segment_length, output_size) --> (4, 64, 128, 768)
        encoder_outputs = encoder_outputs.contiguous().view(input_ids.size(0), self.max_segments,
                                                            self.max_segment_length,
                                                            self.hidden_size)

        # Gather CLS outputs per segment --> (4, 64, 768)
        encoder_outputs = encoder_outputs[:, :, 0]

        # Infer real segments, i.e., mask paddings
        seg_mask = (torch.sum(input_ids, 2) != 0).to(input_ids.dtype)
        # Infer and collect segment positional embeddings
        seg_positions = torch.arange(1, self.max_segments + 1).to(input_ids.device) * seg_mask
        # Add segment positional embeddings to segment inputs
        encoder_outputs += self.seg_pos_embeddings(seg_positions)

        # Encode segments with segment-wise transformer
        seg_encoder_outputs = self.seg_encoder(encoder_outputs)

        # Collect document representation
        outputs, _ = torch.max(seg_encoder_outputs, 1)

        return SimpleOutput(last_hidden_state=outputs, hidden_states=outputs)


if __name__ == "__main__":
    from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

    # Use as a stand-alone encoder
    bert = AutoModel.from_pretrained('bert-base-uncased')
    model = HierarchicalBert(encoder=bert, max_segments=64, max_segment_length=128)

    fake_inputs = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []}
    for i in range(4):
        # Tokenize segment
        temp_inputs = tokenizer(['dog ' * 126] * 64)
        fake_inputs['input_ids'].append(temp_inputs['input_ids'])
        fake_inputs['attention_mask'].append(temp_inputs['attention_mask'])
        fake_inputs['token_type_ids'].append(temp_inputs['token_type_ids'])

    fake_inputs['input_ids'] = torch.as_tensor(fake_inputs['input_ids'])
    fake_inputs['attention_mask'] = torch.as_tensor(fake_inputs['attention_mask'])
    fake_inputs['token_type_ids'] = torch.as_tensor(fake_inputs['token_type_ids'])

    output = model(fake_inputs['input_ids'], fake_inputs['attention_mask'], fake_inputs['token_type_ids'])

    # 4 document representations of 768 features are expected
    assert output[0].shape == torch.Size([4, 768])

    # Use with HuggingFace AutoModelForSequenceClassification and Trainer API

    # Init Classifier
    model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=10)
    # Replace flat BERT encoder with hierarchical BERT encoder
    model.bert = HierarchicalBert(encoder=model.bert, max_segments=64, max_segment_length=128)
    output = model(fake_inputs['input_ids'], fake_inputs['attention_mask'], fake_inputs['token_type_ids'])

    # 4 document outputs with 10 (num_labels) logits are expected
    assert output.logits.shape == torch.Size([4, 10])



Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from torch import nn
from transformers import Trainer


class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = nn.BCEWithLogitsLoss()
        loss = loss_fct(logits.view(-1, self.model.config.num_labels),
                        labels.float().view(-1, self.model.config.num_labels))
        return (loss, outputs) if return_outputs else loss





In [4]:
import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional

import tqdm
import re

from filelock import FileLock
from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available
import datasets

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class InputFeatures:
    """
    A single set of features of data.
    Property names are the same names as the corresponding inputs to a model.
    """

    input_ids: List[List[int]]
    attention_mask: Optional[List[List[int]]]
    token_type_ids: Optional[List[List[int]]]
    label: Optional[int]


class Split(Enum):
    train = "train"
    dev = "dev"
    test = "test"


if is_torch_available():
    import torch
    from torch.utils.data.dataset import Dataset

    class MultipleChoiceDataset(Dataset):
        """
        PyTorch multiple choice dataset class
        """

        features: List[InputFeatures]

        def __init__(
            self,
            tokenizer: PreTrainedTokenizer,
            task: str,
            max_seq_length: Optional[int] = None,
            overwrite_cache=False,
            mode: Split = Split.train,
        ):
            dataset = datasets.load_dataset('lex_glue', task)
            tokenizer_name = re.sub('[^a-z]+', ' ', tokenizer.name_or_path).title().replace(' ', '')
            cached_features_file = os.path.join(
                '.cache',
                task,
                "cached_{}_{}_{}_{}".format(
                    mode.value,
                    tokenizer_name,
                    str(max_seq_length),
                    task,
                ),
            )

            # Make sure only the first process in distributed training processes the dataset,
            # and the others will use the cache.
            lock_path = cached_features_file + ".lock"
            if not os.path.exists(os.path.join('.cache', task)):
                if not os.path.exists('.cache'):
                    os.mkdir('.cache')
                os.mkdir(os.path.join('.cache', task))
            with FileLock(lock_path):

                if os.path.exists(cached_features_file) and not overwrite_cache:
                    logger.info(f"Loading features from cached file {cached_features_file}")
                    self.features = torch.load(cached_features_file)
                else:
                    logger.info(f"Creating features from dataset file at {task}")
                    if mode == Split.dev:
                        examples = dataset['validation']
                    elif mode == Split.test:
                        examples = dataset['test']
                    elif mode == Split.train:
                        examples = dataset['train']
                    logger.info("Training examples: %s", len(examples))
                    self.features = convert_examples_to_features(
                        examples,
                        max_seq_length,
                        tokenizer,
                    )
                    logger.info("Saving features into cached file %s", cached_features_file)
                    torch.save(self.features, cached_features_file)

        def __len__(self):
            return len(self.features)

        def __getitem__(self, i) -> InputFeatures:
            return self.features[i]


if is_tf_available():
    import tensorflow as tf

    class TFMultipleChoiceDataset:
        """
        TensorFlow multiple choice dataset class
        """

        features: List[InputFeatures]

        def __init__(
            self,
            tokenizer: PreTrainedTokenizer,
            task: str,
            max_seq_length: Optional[int] = 256,
            overwrite_cache=False,
            mode: Split = Split.train,
        ):
            dataset = datasets.load_dataset('lex_glue')

            logger.info(f"Creating features from dataset file at {task}")
            if mode == Split.dev:
                examples = dataset['validation']
            elif mode == Split.test:
                examples = dataset['test']
            else:
                examples = dataset['train']
            logger.info(f"{mode.name.title()} examples: %s", len(examples))

            self.features = convert_examples_to_features(
                examples,
                max_seq_length,
                tokenizer,
            )

            def gen():
                for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
                    if ex_index % 10000 == 0:
                        logger.info("Writing example %d of %d" % (ex_index, len(examples)))

                    yield (
                        {
                            "input_ids": ex.input_ids,
                            "attention_mask": ex.attention_mask,
                            "token_type_ids": ex.token_type_ids,
                        },
                        ex.label,
                    )

            self.dataset = tf.data.Dataset.from_generator(
                gen,
                (
                    {
                        "input_ids": tf.int32,
                        "attention_mask": tf.int32,
                        "token_type_ids": tf.int32,
                    },
                    tf.int64,
                ),
                (
                    {
                        "input_ids": tf.TensorShape([None, None]),
                        "attention_mask": tf.TensorShape([None, None]),
                        "token_type_ids": tf.TensorShape([None, None]),
                    },
                    tf.TensorShape([]),
                ),
            )

        def get_dataset(self):
            self.dataset = self.dataset.apply(tf.data.experimental.assert_cardinality(len(self.features)))

            return self.dataset

        def __len__(self):
            return len(self.features)

        def __getitem__(self, i) -> InputFeatures:
            return self.features[i]


def convert_examples_to_features(
    examples: datasets.Dataset,
    max_length: int,
    tokenizer: PreTrainedTokenizer,
) -> List[InputFeatures]:
    """
    Loads a data file into a list of `InputFeatures`
    """
    features = []
    feature_list = []
    context_params = []  # Initialize context_params list
    ending_params = []   # Initialize ending_params list
    for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
        if ex_index % 10000 == 0:
            logger.info("Writing example %d of %d" % (ex_index, len(examples)))
        choices_inputs = []
        for ending_idx, ending in enumerate(example['endings']):
            context = example['context']
            inputs = tokenizer(
                context,
                ending,
                add_special_tokens=True,
                max_length=max_length,
                padding="max_length",
                truncation=True,
            )

            choices_inputs.append(inputs)
            context_params.append(context)  # Append context_params to list
            ending_params.append(ending)
        label = example['label']

        input_ids = [x["input_ids"] for x in choices_inputs]
        attention_mask = (
            [x["attention_mask"] for x in choices_inputs] if "attention_mask" in choices_inputs[0] else None
        )
        token_type_ids = (
            [x["token_type_ids"] for x in choices_inputs] if "token_type_ids" in choices_inputs[0] else None
        )


        feature_dict = {
                "input_ids": inputs['input_ids'],
                "attention_mask": inputs['attention_mask'],
                "token_type_ids": inputs['token_type_ids'],
                "label": label,
                "context_params": context_params,
                "ending_params": ending_params,
            }
        #feature_list.append(feature_dict)
        #features.append(feature_dict)

    for f in feature_dict[:2]:
        logger.info("*** Example ***")
        logger.info("feature: %s" % f)

    return feature_dict


In [15]:
#!/usr/bin/env python
# coding=utf-8
""" Finetuning models on CaseHOLD (e.g. Bert, RoBERTa, LEGAL-BERT)."""

import logging
import os
from dataclasses import dataclass, field
from typing import Optional
from sklearn.model_selection import ParameterGrid
import numpy as np
import random
import shutil
import glob
import os

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
		AutoModelForMultipleChoice,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    TrainingArguments,
    default_data_collator,
    set_seed,
    EarlyStoppingCallback,
    Trainer
)
from transformers.trainer_utils import is_main_process
from transformers import EarlyStoppingCallback
# from casehold_helpers import MultipleChoiceDataset, Split
from sklearn.metrics import f1_score
# from models.deberta import DebertaForMultipleChoice

logger = logging.getLogger(__name__)

param_grid = {
    'learning_rate': [1e-5, 2e-5],  # Learning rates to try
    'num_train_epochs': [1, 2],        # Number of training epochs to try
    'per_device_train_batch_size': [2, 4],  # Batch sizes for training
    'per_device_eval_batch_size': [2, 4],   # Batch sizes for evaluation
}


@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )


@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    task_name: str = field(default="case_hold", metadata={"help": "The name of the task to train on"})
    max_seq_length: int = field(
        default=256,
        metadata={
            "help": "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded."
        },
    )
    pad_to_max_length: bool = field(
        default=True,
        metadata={
            "help": "Whether to pad all samples to `max_seq_length`. "
            "If False, will pad the samples dynamically when batching to the maximum length in the batch."
        },
    )
    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
            "value if set."
        },
    )
    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
            "value if set."
        },
    )
    max_predict_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
            "value if set."
        },
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )


def main(training_args):
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.
    data_args = DataTrainingArguments(
        max_seq_length=128,
        # max_segments=64,
        # max_seg_length=128,
        overwrite_cache=False,
        pad_to_max_length=True,
    )
    model_args = ModelArguments(
        model_name_or_path="bert-base-uncased",
        #"microsoft/deberta-base",
        # hierarchical=True,
        #do_lower_case=True,
        #use_fast_tokenizer=True,
    )
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info("Training/evaluation parameters %s", training_args)

    # Set seed
    set_seed(training_args.seed)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
        num_labels=5,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )

    if config.model_type == 'big_bird':
        config.attention_type = 'original_full'
    elif config.model_type == 'longformer':
        config.attention_window = [data_args.max_seq_length] * config.num_hidden_layers

    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        # Default fast tokenizer is buggy on CaseHOLD task, switch to legacy tokenizer
        use_fast=True,
    )

    if config.model_type != 'deberta':
        model = AutoModelForMultipleChoice.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
        )
    else:
        model = DebertaForMultipleChoice.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
        )

    train_dataset = None
    eval_dataset = None

    # If do_train passed, train_dataset by default loads train split from file named train.csv in data directory
    if training_args.do_train:
        train_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.train,
            )

    # If do_eval or do_predict passed, eval_dataset by default loads dev split from file named dev.csv in data directory
    if training_args.do_eval:
        eval_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.dev,
            )

    if training_args.do_predict:
        predict_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.test,
            )

    if training_args.do_train:
        if data_args.max_train_samples is not None:
            train_dataset = train_dataset[:data_args.max_train_samples]
        # Log a few random samples from the training set:
        for index in random.sample(range(len(train_dataset)), 3):
            logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

    if training_args.do_eval:
        if data_args.max_eval_samples is not None:
            eval_dataset = eval_dataset[:data_args.max_eval_samples]

    if training_args.do_predict:
        if data_args.max_predict_samples is not None:
            predict_dataset = predict_dataset[:data_args.max_predict_samples]

    # Define custom compute_metrics function, returns macro F1 metric for CaseHOLD task
    def compute_metrics(p: EvalPrediction):
        preds = np.argmax(p.predictions, axis=1)
        # Compute macro and micro F1 for 5-class CaseHOLD task
        macro_f1 = f1_score(y_true=p.label_ids, y_pred=preds, average='macro', zero_division=0)
        micro_f1 = f1_score(y_true=p.label_ids, y_pred=preds, average='micro', zero_division=0)
        return {'macro-f1': macro_f1, 'micro-f1': micro_f1}

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Training
    if training_args.do_train:
        trainer.train(
            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
        )
        trainer.save_model()
        # Re-save the tokenizer for model sharing
        if trainer.is_world_process_zero():
            tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation on eval_dataset
    if training_args.do_eval:
        logger.info("*** Evaluate ***")
    metrics = trainer.evaluate(eval_dataset=eval_dataset)

    max_eval_samples = (
        data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
    )
    metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

    # Save the evaluation metrics to a file
    with open("eval_metrics.txt", "w") as f:
        for key, value in metrics.items():
            f.write(f"{key}: {value}\n")

    for example in eval_dataset:
        print("###########")
        print(example.context_params)
        break
    # Get predictions from the model
    predictions, labels, contexts = trainer.predict(eval_dataset)

    correct_predictions = []
    incorrect_predictions = []
    incorrect_contexts = []
    incorrect_ending_lists =[]

    for pred, label, context in zip(predictions, labels, contexts):
        predicted_label = np.argmax(pred)
        if predicted_label == label:
            correct_predictions.append((pred, label))
        else:
            incorrect_predictions.append((pred, label, contexts))

    # Save correct predictions to a file
    with open("correct_predictions.txt", "w") as f:
        for pred, label in correct_predictions:
            f.write(f"Predicted: {pred}, Actual: {label}\n")

    # Save incorrect predictions to a file
    with open("incorrect_predictions.txt", "w") as f:
        for pred, label ,contexts in incorrect_predictions:
            f.write(f"Predicted: {pred}, Actual: {label}\n")
            f.write(f"Context: {contexts}\n")
    for pred, label, context, entry in zip(predictions, labels, contexts, eval_dataset):
        predicted_label = np.argmax(pred)
        context_params = entry.context_params
        ending_params = entry.ending_params

        incorrect_contexts.append((context_params, label))
        incorrect_ending_lists.append(ending_params)

        incorrect_predictions.append((pred, label, context_params,ending_params))
# Save incorrect contexts and ending_lists to a JSON file
    incorrect_entries = {
    "contexts": incorrect_contexts,
    "ending_lists": incorrect_ending_lists
}

    with open("incorrect_entries.json", "w") as json_file:
         json.dump(incorrect_entries, json_file, indent=4)

    # Predict on eval_dataset
    if training_args.do_predict:
        logger.info("*** Predict ***")

        predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix="predict")

        max_predict_samples = (
            data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
        )
        metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset))

        trainer.log_metrics("predict", metrics)
        trainer.save_metrics("predict", metrics)

        output_predict_file = os.path.join(training_args.output_dir, "test_predictions.csv")
        if trainer.is_world_process_zero():
            with open(output_predict_file, "w") as writer:
                for index, pred_list in enumerate(predictions):
                    pred_line = '\t'.join([f'{pred:.5f}' for pred in pred_list])
                    writer.write(f"{index}\t{pred_line}\n")


# Print the best hyperparameters and its corresponding validation macro-f1 score

    # Clean up checkpoints
    checkpoints = [filepath for filepath in glob.glob(f'{training_args.output_dir}/*/') if '/checkpoint' in filepath]
    for checkpoint in checkpoints:
        shutil.rmtree(checkpoint)


# def _mp_fn(index):
# For xla_spawn (TPUs)
# main()


if __name__ == "__main__":
    # main()

    training_args = TrainingArguments(
        do_train=False,
        do_eval=True,
        do_predict=True,
        output_dir=os.getcwd(),
        overwrite_output_dir=True,
        num_train_epochs=10,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        save_steps=500,
        save_total_limit=2,
        fp16=False,
        logging_dir="./logs",
        logging_steps=200,
        evaluation_strategy="steps",
        eval_steps=500,
        logging_first_step=False,
        load_best_model_at_end=True,
        metric_for_best_model="micro-f1",
    )
    main(training_args)
		 # Train the model
    #train_result = trainer.train()
    #metrics = train_result.metrics

    # Evaluate the model




[INFO|training_args.py:1299] 2023-08-22 03:27:40,934 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
[INFO|training_args.py:1713] 2023-08-22 03:27:40,935 >> PyTorch: setting up devices
[INFO|training_args.py:1439] 2023-08-22 03:27:40,936 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
[INFO|configuration_utils.py:712] 2023-08-22 03:27:41,200 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json
[INFO|configuration

TypeError: ignored

In [None]:
if training_args.do_eval:
        logger.info("*** Evaluate ***")
    metrics = trainer.evaluate(eval_dataset=eval_dataset)

    max_eval_samples = (
        data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
    )
    metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

    # Save the evaluation metrics to a file
    with open("eval_metrics.txt", "w") as f:
        for key, value in metrics.items():
            f.write(f"{key}: {value}\n")

    # Get predictions from the model
    predictions, labels, contexts = trainer.predict(eval_dataset)

    correct_predictions = []
    incorrect_predictions = []
    incorrect_contexts = []
    incorrect_ending_lists =[]

    for pred, label, context in zip(predictions, labels, contexts):
        predicted_label = np.argmax(pred)
        if predicted_label == label:
            correct_predictions.append((pred, label))
        else:
            incorrect_predictions.append((pred, label, contexts))

    # Save correct predictions to a file
    with open("correct_predictions.txt", "w") as f:
        for pred, label in correct_predictions:
            f.write(f"Predicted: {pred}, Actual: {label}\n")

    # Save incorrect predictions to a file
    with open("incorrect_predictions.txt", "w") as f:
        for pred, label ,contexts in incorrect_predictions:
            f.write(f"Predicted: {pred}, Actual: {label}\n")
            f.write(f"Context: {contexts}\n")
    for pred, label, context, entry in zip(predictions, labels, contexts, eval_dataset):
        predicted_label = np.argmax(pred)
    if predicted_label != label:
        context = entry['context_params']
        endings = entry['ending_params']
        token_type_ids = entry['token_type_ids']
        #context = entry['context']
        #endings = entry['endings']

        incorrect_contexts.append((context, label))
        incorrect_ending_lists.append(endings)

        incorrect_predictions.append((pred, label, context))

# Save incorrect contexts and ending_lists to a JSON file
    incorrect_entries = {
    "contexts": incorrect_contexts,
    "ending_lists": incorrect_ending_lists
}

    with open("incorrect_entries.json", "w") as json_file:
         json.dump(incorrect_entries, json_file, indent=4)



 training_args = TrainingArguments(
        do_train=True,
        do_eval=True,
        do_predict=True,
        output_dir=os.getcwd(),
        overwrite_output_dir=True,
        num_train_epochs=20,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        save_steps=500,
        save_total_limit=2,
        fp16=False,
        logging_dir="./logs",
        logging_steps=200,
        evaluation_strategy="steps",
        eval_steps=500,
        logging_first_step=False,
        load_best_model_at_end=True,
        metric_for_best_model="micro-f1",
    )
    main(training_args)

In [None]:
import re

# Read the "incorrect-predictions.txt" file
with open("/content/incorrect_predictions.txt", "r") as f:
    lines = f.readlines()

# Initialize variables
current_entry = {}
data_entries = []

# Process the lines and create data entries
for line in lines:
    line = line.strip()
    if line.startswith("Index: "):
        current_entry["Index"] = int(line.split(": ")[1])
    elif line.startswith("Input Text: "):
        current_entry["text"] = line.split(": ")[1]
    elif line.startswith("Input IDs: "):
        input_ids_str = line.split(": ")[1]
        input_ids = [int(id_str) for id_str in re.findall(r'\d+', input_ids_str)]
        current_entry["input_ids"] = input_ids
    elif line.startswith("Labels: "):  # Process labels
        labels_str = line.split(": ")[1]
        labels = [int(label_str) for label_str in re.findall(r'\d+', labels_str)]
        current_entry["labels"] = labels
    elif line.startswith("Original_Labels: "):  # Process original labels
        labels_str = line.split(": ")[1]
        original_labels = [int(label_str) for label_str in re.findall(r'\d+', labels_str)]
        current_entry["original_labels"] = original_labels
        data_entries.append(current_entry)
        current_entry = {}

# Extract the list of labels from data entries
label_lists = [entry.get("original_labels", []) for entry in data_entries]

print(label_lists)
print(len(label_lists))
filtered_list = [item for item in set(tuple(lst) for lst in label_lists) if item]
print(filtered_list)
# Print the list of labels for each entry
#for index, labels in enumerate(label_lists):
#    print(f"Entry {index}: Labels = {labels}")


In [None]:
import json
import re

# Read the "incorrect-predictions.txt" file
with open("/content/incorrect_predictions.txt", "r") as f:
    lines = f.readlines()

# Initialize variables
current_entry = {}
data_entries = []

# Process the lines and create data entries
for line in lines:
    line = line.strip()
    if line.startswith("Index: "):
        current_entry["Index"] = int(line.split(": ")[1])
    elif line.startswith("Input Text: "):
        current_entry["text"] = line.split(": ")[1]
    elif line.startswith("Input IDs: "):
        input_ids_str = line.split(": ")[1]
        input_ids = [int(id_str) for id_str in re.findall(r'\d+', input_ids_str)]
        current_entry["input_ids"] = input_ids
        current_entry["labels"] = []  # Assuming no labels for incorrect predictions
        data_entries.append(current_entry)
        current_entry = {}
    elif line.startswith("Labels: "):  # Process labels
        labels_str = line.split(": ")[1]
        labels = [int(label_str) for label_str in re.findall(r'\d+', labels_str)]
        current_entry["labels"] = labels
        data_entries.append(current_entry)
        current_entry = {}
    elif line.startswith("Original_Labels: "):  # Process original labels
        labels_str = line.split(": ")[1]
        labels = [int(label_str) for label_str in re.findall(r'\d+', labels_str)]
        current_entry["original_labels"] = labels
        data_entries.append(current_entry)
        current_entry = {}

# Save the data entries to a JSON file
output_filename = "incorrect_predictions_dataset.json"
with open(output_filename, "w") as json_file:
    json.dump(data_entries, json_file, indent=4)

print(f"Converted incorrect predictions saved to {output_filename}")


In [None]:
incorrect_predictions = []
with open("incorrect_predictions.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        parts = line.strip().split(", ")
        pred = eval(parts[0].replace("Predicted: ", ""))
        actual = int(parts[1].replace("Actual: ", ""))
        incorrect_predictions.append({"predicted": pred, "actual": actual})

# Save incorrect predictions to a JSON file
with open("incorrect_predictions_dataset.json", "w") as json_file:
    json.dump(incorrect_predictions, json_file, indent=4)

In [None]:
import json
from datasets import Dataset

import pandas as pd

# Load JSON data from file
file_path = '/content/incorrect_predictions_dataset.json'
with open(file_path, 'r') as json_file:
    data = json.load(json_file)

# Create a dictionary to store original_labels
labels_dict = {}

# Iterate through the data and extract original_labels
for entry in data:
    if "original_labels" in entry:
        original_labels = entry["original_labels"]
        if "Index" in entry:
            index = entry["Index"]
            labels_dict[index] = original_labels
        elif len(labels_dict) > 0:
            last_index = max(labels_dict.keys())
            labels_dict[last_index].extend(original_labels)

# Initialize lists to store texts and labels
texts = []
text= []
labels = label_lists[0:173]
print("#####")
print(len(label_lists))
# Iterate through the data and extract relevant information
for entry in data:
    if "text" in entry:
        text = entry["text"]
        if "Index" in entry:
            index = entry["Index"]
            #labels.append(labels_dict.get(index, []))
        else:
            # Use the last index available in labels_dict
            last_index = max(labels_dict.keys())
            #labels.append(labels_dict.get(last_index, []))
        texts.append(text)

print("Number of Texts:", len(texts))
print("Number of Labels:", len(labels))
from datasets import load_dataset
existing_dataset = load_dataset("lex_glue", 'unfair_tos')
#dataset=dataset_dict['train']
#print("Number of Texts:", len(existing_dataset['text']))
#print("Number of Labels:", len(existing_dataset['text']))
#aligned_data = [{"text": text, "labels": label} for text, label in zip(texts, labels)]

# Create a new Dataset from the aligned data
#new_dataset = Dataset.from_dict(aligned_data)

aligned_data = {"text": texts, "labels": labels}
df = pd.DataFrame(aligned_data)

# Create a new Dataset from the DataFrame
new_dataset = Dataset.from_pandas(df)

#merged_dataset = existing_dataset["train"].concatenate(new_dataset)
#text= existing_dataset["train"]["text"] + texts
#labels= existing_dataset["train"]["labels"] + labels
merged_dataset = Dataset.from_dict({
    "text": existing_dataset["train"]["text"] + texts,
    "labels": existing_dataset["train"]["labels"] + labels,
})
#existing_dataset["train"]["text"] += texts
#existing_dataset["train"]["labels"] += labels
print("Number of Texts:", len(merged_dataset['text']))
print("Number of Labels:", len(merged_dataset['labels']))


In [None]:
#!/usr/bin/env python
# coding=utf-8
""" Finetuning models on CaseHOLD (e.g. Bert, RoBERTa, LEGAL-BERT)."""

import logging
import os
from dataclasses import dataclass, field
from typing import Optional
from sklearn.model_selection import ParameterGrid
import numpy as np
import random
import shutil
import glob
import os

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
		AutoModelForMultipleChoice,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    TrainingArguments,
    default_data_collator,
    set_seed,
    EarlyStoppingCallback,
    Trainer
)
from transformers.trainer_utils import is_main_process
from transformers import EarlyStoppingCallback
# from casehold_helpers import MultipleChoiceDataset, Split
from sklearn.metrics import f1_score
# from models.deberta import DebertaForMultipleChoice

logger = logging.getLogger(__name__)

param_grid = {
    'learning_rate': [1e-5, 2e-5],  # Learning rates to try
    'num_train_epochs': [1, 2],        # Number of training epochs to try
    'per_device_train_batch_size': [2, 4],  # Batch sizes for training
    'per_device_eval_batch_size': [2, 4],   # Batch sizes for evaluation
}


@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )


@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    task_name: str = field(default="case_hold", metadata={"help": "The name of the task to train on"})
    max_seq_length: int = field(
        default=256,
        metadata={
            "help": "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded."
        },
    )
    pad_to_max_length: bool = field(
        default=True,
        metadata={
            "help": "Whether to pad all samples to `max_seq_length`. "
            "If False, will pad the samples dynamically when batching to the maximum length in the batch."
        },
    )
    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
            "value if set."
        },
    )
    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
            "value if set."
        },
    )
    max_predict_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
            "value if set."
        },
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )


def main(training_args):
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.
    data_args = DataTrainingArguments(
        max_seq_length=128,
        # max_segments=64,
        # max_seg_length=128,
        overwrite_cache=False,
        pad_to_max_length=True,
    )
    model_args = ModelArguments(
        model_name_or_path="microsoft/deberta-base",
        # hierarchical=True,
        #do_lower_case=True,
        #use_fast_tokenizer=True,
    )
    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info("Training/evaluation parameters %s", training_args)

    # Set seed
    set_seed(training_args.seed)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
        num_labels=5,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )

    if config.model_type == 'big_bird':
        config.attention_type = 'original_full'
    elif config.model_type == 'longformer':
        config.attention_window = [data_args.max_seq_length] * config.num_hidden_layers

    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        # Default fast tokenizer is buggy on CaseHOLD task, switch to legacy tokenizer
        use_fast=True,
    )

    if config.model_type != 'deberta':
        model = AutoModelForMultipleChoice.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
        )
    else:
        model = DebertaForMultipleChoice.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
        )

    train_dataset = None
    eval_dataset = None

    # If do_train passed, train_dataset by default loads train split from file named train.csv in data directory
    if training_args.do_train:
        train_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.train,
            )

    # If do_eval or do_predict passed, eval_dataset by default loads dev split from file named dev.csv in data directory
    if training_args.do_eval:
        eval_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.dev,
            )

    if training_args.do_predict:
        predict_dataset = \
            MultipleChoiceDataset(
                tokenizer=tokenizer,
                task=data_args.task_name,
                max_seq_length=data_args.max_seq_length,
                overwrite_cache=data_args.overwrite_cache,
                mode=Split.test,
            )

    if training_args.do_train:
        if data_args.max_train_samples is not None:
            train_dataset = train_dataset[:data_args.max_train_samples]
        # Log a few random samples from the training set:
        for index in random.sample(range(len(train_dataset)), 3):
            logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

    if training_args.do_eval:
        if data_args.max_eval_samples is not None:
            eval_dataset = eval_dataset[:data_args.max_eval_samples]

    if training_args.do_predict:
        if data_args.max_predict_samples is not None:
            predict_dataset = predict_dataset[:data_args.max_predict_samples]

    # Define custom compute_metrics function, returns macro F1 metric for CaseHOLD task
    def compute_metrics(p: EvalPrediction):
        preds = np.argmax(p.predictions, axis=1)
        # Compute macro and micro F1 for 5-class CaseHOLD task
        macro_f1 = f1_score(y_true=p.label_ids, y_pred=preds, average='macro', zero_division=0)
        micro_f1 = f1_score(y_true=p.label_ids, y_pred=preds, average='micro', zero_division=0)
        return {'macro-f1': macro_f1, 'micro-f1': micro_f1}

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Training
    if training_args.do_train:
        trainer.train(
            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
        )
        trainer.save_model()
        # Re-save the tokenizer for model sharing
        if trainer.is_world_process_zero():
            tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation on eval_dataset
    if training_args.do_eval:
        logger.info("*** Evaluate ***")
        metrics = trainer.evaluate(eval_dataset=eval_dataset)

        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Predict on eval_dataset
    if training_args.do_predict:
        logger.info("*** Predict ***")

        predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix="predict")

        max_predict_samples = (
            data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
        )
        metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset))

        trainer.log_metrics("predict", metrics)
        trainer.save_metrics("predict", metrics)

        output_predict_file = os.path.join(training_args.output_dir, "test_predictions.csv")
        if trainer.is_world_process_zero():
            with open(output_predict_file, "w") as writer:
                for index, pred_list in enumerate(predictions):
                    pred_line = '\t'.join([f'{pred:.5f}' for pred in pred_list])
                    writer.write(f"{index}\t{pred_line}\n")


# Print the best hyperparameters and its corresponding validation macro-f1 score

    # Clean up checkpoints
    checkpoints = [filepath for filepath in glob.glob(f'{training_args.output_dir}/*/') if '/checkpoint' in filepath]
    for checkpoint in checkpoints:
        shutil.rmtree(checkpoint)


# def _mp_fn(index):
# For xla_spawn (TPUs)
# main()


if __name__ == "__main__":
    # main()

    training_args = TrainingArguments(
        do_train=True,
        do_eval=True,
        do_predict=True,
        output_dir=os.getcwd(),
        overwrite_output_dir=True,
        num_train_epochs=10,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        save_steps=500,
        save_total_limit=2,
        fp16=False,
        logging_dir="./logs",
        logging_steps=200,
        evaluation_strategy="steps",
        eval_steps=500,
        logging_first_step=False,
        load_best_model_at_end=True,
        metric_for_best_model="micro-f1",
    )
    main(training_args)
		 # Train the model
    #train_result = trainer.train()
    #metrics = train_result.metrics

    # Evaluate the model




[INFO|training_args.py:1299] 2023-08-12 04:29:38,078 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
[INFO|training_args.py:1713] 2023-08-12 04:29:38,079 >> PyTorch: setting up devices
[INFO|training_args.py:1439] 2023-08-12 04:29:38,081 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
[INFO|configuration_utils.py:712] 2023-08-12 04:29:38,302 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
[INFO|configu

Step,Training Loss,Validation Loss


KeyboardInterrupt: ignored