<a href="https://colab.research.google.com/github/optimopium/is-this-political/blob/main/Max_Entropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Max Entropy Active Learning

## Preliminaries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Code is adapted from [here.](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.py) and [here](https://github.com/adapter-hub/adapter-transformers/blob/cffdf3974ea19f49e1febe6e3f5b74be4e2d496a/examples/pytorch/text-classification/run_glue.py)

In [None]:
! mkdir results
! mkdir data

In [None]:
!pip install --quiet --upgrade gdown
!pip install --quiet -U transformers
!pip install --quiet datasets
!pip install --quiet scikit-learn
!pip install --quiet evaluate
!pip install --quiet sentencepiece

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m93.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.8/462.8 KB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m213.0/213.0 KB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.0/132.0 KB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import logging
import random
import sys
import os

from scipy.stats import entropy
import torch
from datasets import load_dataset

random.seed("42")

In [None]:
import gdown

url = "https://drive.google.com/drive/folders/1_0qVo_iLOtjVcnybhBCOXMpguxCeUD1t"
gdown.download_folder(url, output="./", quiet=True, use_cookies=False)

['./annotated/annotator1.csv',
 './annotated/annotator2.csv',
 './annotated/dataset.csv',
 './annotated/full_dataset.csv',
 './annotated/test.csv',
 './annotated/train.csv',
 './annotated/validation.csv']

In [None]:
# Creating an object
logger = logging.getLogger()
 
# Setting the threshold of logger to DEBUG
logger.setLevel(logging.INFO)

In [None]:
task_to_keys = {
    "politics": ("sentence", None),
}

base_dir = './annotated/'

In [None]:
data_files = {"train": base_dir + "train.csv", "validation": base_dir + "validation.csv", "test": base_dir + "test.csv"}

In [None]:
# Active learning variables
BUDGET = 1125
INITIAL_DATASET_SIZE = 125
ACQUISITION_SIZE = 100
ITERATIONS = int((BUDGET - INITIAL_DATASET_SIZE) / ACQUISITION_SIZE)
print(f"Budget: {BUDGET}")
print(f"Initial Dataset Size: {INITIAL_DATASET_SIZE}")
print(f"Acquisition size: {ACQUISITION_SIZE}")
print(f"Iterations: {ITERATIONS}")

# General variables
MAX_SEQ_LEN = 128
TRAIN_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 16
LEARNING_RATE = 2e-5
LOGGING_STEPS = 50
EVAL_STEPS = 50
# epoch * (budget/batch size)
MAX_STEPS = 500
CANDIDATE_TO_SAMPLE_RATIO=5
BASE_MODEL = 'xlm-roberta-base'

print(MAX_STEPS)

Budget: 1125
Initial Dataset Size: 125
Acquisition size: 100
Iterations: 10
500


## Train Model

In [None]:
from dataclasses import dataclass, field
from typing import Optional

In [None]:
@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    Using `HfArgumentParser` we can turn this class
    into argparse arguments to be able to specify them on
    the command line.
    """

    task_name: Optional[str] = field(
        default=None,
        metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())},
    )
    dataset_name: Optional[str] = field(
        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
    )
    dataset_config_name: Optional[str] = field(
        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
    )
    max_seq_length: int = field(
        default=128,
        metadata={
            "help": (
                "The maximum total input sequence length after tokenization. Sequences longer "
                "than this will be truncated, sequences shorter will be padded."
            )
        },
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
    )
    pad_to_max_length: bool = field(
        default=True,
        metadata={
            "help": (
                "Whether to pad all samples to `max_seq_length`. "
                "If False, will pad the samples dynamically when batching to the maximum length in the batch."
            )
        },
    )
    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of training examples to this "
                "value if set."
            )
        },
    )
    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
                "value if set."
            )
        },
    )
    max_predict_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of prediction examples to this "
                "value if set."
            )
        },
    )

In [None]:
@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
    )
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    use_auth_token: bool = field(
        default=False,
        metadata={
            "help": (
                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
                "with private models)."
            )
        },
    )
    ignore_mismatched_sizes: bool = field(
        default=False,
        metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
    )

In [None]:
import datasets
from datasets import concatenate_datasets, load_dataset, load_metric
import numpy as np
from datasets import load_dataset

import evaluate

import transformers
from transformers.trainer_utils import get_last_checkpoint
from transformers import (
    AutoModelForSequenceClassification,
    AutoConfig,
    AutoTokenizer,
    DataCollatorWithPadding,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)

In [None]:
def experiment(raw_datasets, args_dict=None):
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
    if args_dict is not None:
        model_args, data_args, training_args = parser.parse_dict(args_dict)
    elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    datasets.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome."
            )
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # Labels
    if data_args.task_name is not None:
        is_regression = data_args.task_name == "stsb"
        if not is_regression:
            label_list = raw_datasets["train"].features["label"].names
            num_labels = len(label_list)
        else:
            num_labels = 1
    else:
        # Trying to have good defaults here, don't hesitate to tweak to your needs.
        is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"]
        if is_regression:
            num_labels = 1
        else:
            # A useful fast method:
            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
            label_list = raw_datasets["train"].unique("label")
            label_list.sort()  # Let's sort it for determinism
            num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
    )

    # Preprocessing the raw_datasets
    if data_args.task_name is not None:
        sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
    else:
        # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
        non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"]
        if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
            sentence1_key, sentence2_key = "sentence1", "sentence2"
        else:
            if len(non_label_column_names) > 2:
                sentence1_key, sentence2_key = non_label_column_names[:2]
            else:
                sentence1_key, sentence2_key = "sentence1", None

    # Padding strategy
    if data_args.pad_to_max_length:
        padding = "max_length"
    else:
        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
        padding = False

    # Some models have set the order of the labels to use, so let's make sure we do use it.
    label_to_id = None
    if (
        model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
        and data_args.task_name is not None
        and not is_regression
    ):
        # Some have all caps in their config, some don't.
        label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
        if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
            label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
        else:
            logger.warning(
                f"""Your model seems to have been trained with labels, but they don't match the dataset:\n
                model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}.\n
                Ignoring the model labels as a result.""",
            )
    elif data_args.task_name is None and not is_regression:
        label_to_id = {v: i for i, v in enumerate(label_list)}

    if label_to_id is not None:
        model.config.label2id = label_to_id
        model.config.id2label = {id: label for label, id in config.label2id.items()}
    elif data_args.task_name is not None and not is_regression:
        model.config.label2id = {l: i for i, l in enumerate(label_list)}
        model.config.id2label = {id: label for label, id in config.label2id.items()}

    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

    def preprocess_function(examples):
        # Tokenize the texts
        args = (
            (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
        )
        result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)

        # Map labels to IDs (not necessary for GLUE tasks)
        if label_to_id is not None and "label" in examples:
            result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
        return result

    with training_args.main_process_first(desc="dataset map pre-processing"):
        raw_datasets = raw_datasets.map(
            preprocess_function,
            batched=True,
            load_from_cache_file=not data_args.overwrite_cache,
            desc="Running tokenizer on dataset",
        )
    if training_args.do_train:
        if "train" not in raw_datasets:
            raise ValueError("--do_train requires a train dataset")
        train_dataset = raw_datasets["train"]
        if data_args.max_train_samples is not None:
            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
            train_dataset = train_dataset.select(range(max_train_samples))

    if training_args.do_eval:
        if "validation" not in raw_datasets and "validation_matched" not in raw_datasets:
            raise ValueError("--do_eval requires a validation dataset")
        eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"]
        if data_args.max_eval_samples is not None:
            max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
            eval_dataset = eval_dataset.select(range(max_eval_samples))

    if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None:
        if "test" not in raw_datasets and "test_matched" not in raw_datasets:
            raise ValueError("--do_predict requires a test dataset")
        predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"]
        if data_args.max_predict_samples is not None:
            max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
            predict_dataset = predict_dataset.select(range(max_predict_samples))

    # Log a few random samples from the training set:
    if training_args.do_train:
        for index in random.sample(range(len(train_dataset)), 3):
            logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

    # Get the metric function
    if data_args.task_name is not None:
        metric = evaluate.load("glue", data_args.task_name)
    else:
        metric = evaluate.load("accuracy")

    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p: EvalPrediction):
        preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
        preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
        if data_args.task_name is not None:
            result = metric.compute(predictions=preds, references=p.label_ids)
            if len(result) > 1:
                result["combined_score"] = np.mean(list(result.values())).item()
            return result
        elif is_regression:
            return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
        else:
            return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

    # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if
    # we already did the padding.
    if data_args.pad_to_max_length:
        data_collator = default_data_collator
    elif training_args.fp16:
        data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
    else:
        data_collator = None

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset if training_args.do_train else None,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        metrics = train_result.metrics
        max_train_samples = (
            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
        )
        metrics["train_samples"] = min(max_train_samples, len(train_dataset))

        trainer.save_model(training_args.output_dir)  # Saves the tokenizer too for easy upload

        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()

    # Evaluation
    evaluation_metrics = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            eval_datasets.append(raw_datasets["validation_mismatched"])

        for eval_dataset, task in zip(eval_datasets, tasks):
            metrics = trainer.evaluate(eval_dataset=eval_dataset)

            max_eval_samples = (
                data_args.max_eval_samples
                if data_args.max_eval_samples is not None
                else len(eval_dataset)
            )
            metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))

            trainer.log_metrics("eval", metrics)
            trainer.save_metrics("eval", metrics)

            evaluation_metrics = metrics

    test_predictions = None
    if training_args.do_predict:
        logger.info("*** Predict ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        predict_datasets = [predict_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            predict_datasets.append(raw_datasets["test_mismatched"])

        for predict_dataset, task in zip(predict_datasets, tasks):
            # Removing the `label` columns because it contains -1 and Trainer won't like that.
            predict_dataset = predict_dataset.remove_columns("label")
            test_predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions

    return evaluation_metrics, test_predictions

In [None]:
def annotate(unlabled_samples):
    return unlabled_samples

## Max Entropy Sampling

In [None]:
def calculate_entropy(logits):
    probas = torch.nn.Softmax(dim=1)(torch.from_numpy(logits))
    samples_entropy = entropy(probas.transpose(0, 1).cpu())
    samples_entropy = torch.from_numpy(samples_entropy)
    return samples_entropy

In [None]:
def entropy_based_active_learning(
        hf_args,
        raw_datasets,
        initial_labeled_dataset_size,
        iteration_count,
        iteration_sample_count,
        candidate_to_selected_samples_ratio=10
    ):

    original_train_dataset = raw_datasets["train"]
    active_learning_data = raw_datasets

    # select initial train dataset from raw dataset
    train_dataset = original_train_dataset.select(
        random.sample(
            range(original_train_dataset.num_rows),
            initial_labeled_dataset_size,
        )
    )

    unlabeled_dataset = original_train_dataset.filter(
        lambda s: s["idx"] not in train_dataset["idx"]
    )

    raw_datasets["train"] = train_dataset
    # Train Initial Model
    logger.info(f'Initial Training with {raw_datasets["train"].num_rows} samples.')
    evaluation_metrics, test_predictions = experiment(raw_datasets, args_dict=hf_args)

    current_iteration = 1
    while  current_iteration <= iteration_count:
        print(f'Current Active Learning Iteration: {current_iteration}')

        if unlabeled_dataset.num_rows <= 0:
            logger.info(f'Not enough unlabeled data to continue. Stoped at iteration {current_iteration}')

        # Sample candidate_to_selected_samples_ratio larger than iteration_sample_count sample for acquisition function
        candidate_count = int(candidate_to_selected_samples_ratio * iteration_sample_count)
        logger.info(f"Candidate samples count for active learning : {candidate_count}")
        candidate_samples = unlabeled_dataset.select(
            random.sample(
                range(unlabeled_dataset.num_rows),
                candidate_count,
            )
        )


        # Acquisition Function - Max Entropy Strategy
        active_learning_data = datasets.DatasetDict({"train":candidate_samples, "test":candidate_samples})
        hf_args["do_train"] = False
        hf_args["do_eval"] = False
        hf_args["do_predict"] = True
        hf_args["evaluation_strategy"] = "no"
        del(hf_args["load_best_model_at_end"])
        _, candidate_test_predictions = experiment(active_learning_data, args_dict=hf_args)
        samples_entropy = calculate_entropy(candidate_test_predictions)
        chosen_samples = torch.topk(samples_entropy, iteration_sample_count)

        # Annotate new samples
        new_train_samples = unlabeled_dataset.select(chosen_samples.indices.tolist())
        new_train_samples = annotate(new_train_samples)


        # Add new samples to labeled dataset
        extended_train_dataset = concatenate_datasets(
            [raw_datasets["train"], new_train_samples],
            info=original_train_dataset.info,
        )
        
        # Remove selected samples from unlabeled dataset
        unlabeled_dataset = original_train_dataset.filter(
            lambda s: s["idx"] not in new_train_samples["idx"]
        )

        # Train new model with new dataset
        raw_datasets["train"] = extended_train_dataset
        hf_args["do_train"] = True
        hf_args["do_eval"] = True
        hf_args["evaluation_strategy"] = "steps"
        hf_args["load_best_model_at_end"] = True
        _, candidate_test_predictions = experiment(raw_datasets, args_dict=hf_args)

        current_iteration += 1

In [None]:
def run_max_entropy_sampling():
    hf_args = {
        "model_name_or_path": BASE_MODEL,
        "do_train": True,
        "do_eval": True,
        "do_predict": True,
        "max_seq_length": MAX_SEQ_LEN,
        "per_device_train_batch_size": TRAIN_BATCH_SIZE,
        "per_device_eval_batch_size": EVAL_BATCH_SIZE,
        "learning_rate": LEARNING_RATE,
        "overwrite_output_dir": True,
        "output_dir": f"./results/max_entropy/",
        "logging_strategy": "steps",
        "logging_steps": LOGGING_STEPS,
        "evaluation_strategy": "steps",
        "eval_steps": EVAL_STEPS,
        "seed": 12,
        "max_steps": MAX_STEPS,
        "load_best_model_at_end": True
    }

    raw_datasets = load_dataset("csv", data_files=data_files)
    entropy_based_active_learning(
        hf_args,
        raw_datasets,
        initial_labeled_dataset_size=INITIAL_DATASET_SIZE,
        iteration_count=ITERATIONS,
        iteration_sample_count=ACQUISITION_SIZE,
        candidate_to_selected_samples_ratio=CANDIDATE_TO_SAMPLE_RATIO
    )

In [None]:
run_max_entropy_sampling()



Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:root:Initial Training with 125 samples.
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=True,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
evaluation_strategy=steps,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=False,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ign

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-af15029ffe585d58.arrow


Downloading (…)lve/main/config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

[INFO|configuration_utils.py:660] 2023-02-21 15:15:11,625 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:15:11,639 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

[INFO|tokenization_utils_base.py:1802] 2023-02-21 15:15:13,933 >> loading file sentencepiece.bpe.model from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/sentencepiece.bpe.model
[INFO|tokenization_utils_base.py:1802] 2023-02-21 15:15:13,935 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/tokenizer.json
[INFO|tokenization_utils_base.py:1802] 2023-02-21 15:15:13,939 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:1802] 2023-02-21 15:15:13,945 >> loading file special_tokens_map.json from cache at None
[INFO|tokenization_utils_base.py:1802] 2023-02-21 15:15:13,947 >> loading file tokenizer_config.json from cache at None
[INFO|configuration_utils.py:660] 2023-02-21 15:15:13,950 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

[INFO|modeling_utils.py:2275] 2023-02-21 15:15:24,901 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/pytorch_model.bin
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-e742c93c16d6a5c7.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-02c97c351f963a81.arrow


Running tokenizer on dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-57b4e977697e145a.arrow
INFO:root:Sample 60 of the training set: {'idx': 1716, 'sentence1': 'تجمع اعتراضی مردم #شمال_بلوچستان در اعتراض به عدم تکمیل پروژه #بزرگراه_زابل_به_زاهدان  #ایران#اعتراضات_سراسری#خیزش_دی۹۷', 'label': 1, 'input_ids': [0, 111359, 44343, 140, 7684, 468, 202072, 454, 7815, 431, 4025, 10691, 175, 44343, 178, 14158, 77953, 28334, 468, 1325, 37227, 163503, 176, 454, 172037, 593, 454, 6963, 454, 17113, 176, 13759, 468, 135491, 4904, 258, 11429, 54897, 396, 454, 99186, 9980, 140, 4904, 3341, 21879, 870, 454, 8598, 220339, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

[INFO|trainer.py:511] 2023-02-21 15:15:37,514 >> max_steps is given, it will override any value given in num_train_epochs
[INFO|trainer.py:710] 2023-02-21 15:15:37,516 >> The following columns in the training set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:1650] 2023-02-21 15:15:37,536 >> ***** Running training *****
[INFO|trainer.py:1651] 2023-02-21 15:15:37,538 >>   Num examples = 125
[INFO|trainer.py:1652] 2023-02-21 15:15:37,539 >>   Num Epochs = 63
[INFO|trainer.py:1653] 2023-02-21 15:15:37,541 >>   Instantaneous batch size per device = 16
[INFO|trainer.py:1654] 2023-02-21 15:15:37,542 >>   Total train batch size (w. parallel, distributed & accumulation) = 16
[INFO|trainer.py:1655] 2023-02-21 15:15:37,543 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:1656] 2

Step,Training Loss,Validation Loss,Accuracy
50,0.5382,0.315805,0.875
100,0.0916,0.689839,0.894
150,0.0182,0.763971,0.9
200,0.0111,0.837621,0.89
250,0.0336,0.950676,0.878
300,0.0303,0.819127,0.893
350,0.0002,0.91109,0.885
400,0.0002,0.921493,0.886
450,0.0002,0.929829,0.886
500,0.0002,0.931692,0.886


[INFO|trainer.py:710] 2023-02-21 15:16:01,286 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:16:01,289 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:16:01,291 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:16:01,293 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:16:30,822 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:16:30,826 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       62.5
  total_flos               =   478688GF
  train_loss               =     0.0724
  train_runtime            = 0:05:10.05
  train_samples            =        125
  train_samples_per_second =     25.802
  train_steps_per_second   =      1.613


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:21:00,310 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:21:00,317 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:21:00,320 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:21:00,322 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       62.5
  eval_accuracy           =      0.886
  eval_loss               =     0.9317
  eval_runtime            = 0:00:07.37
  eval_samples            =       1000
  eval_samples_per_second =    135.661
  eval_steps_per_second   =      8.547


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:21:11,908 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:21:11,913 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 1


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-7d0649081fd79da0.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:21:12,189 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:21:12,193 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-c2cc1a69d7c30509.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-cc486be08316473a.arrow
[INFO|trainer.py:511] 2023-02-21 15:21:22,224 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:21:22,234 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:21:22,240 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:21:22,242 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:21:22,243 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-b3f7b84251980cca.arrow
[INFO|training_args.py:1402] 2023-02-21 15:21:32,327 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:21:32,330 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-08a7a77278dd114a.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:21:32,525 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:21:32,527 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-b685fabec9ed0849.arrow
INFO:root:Sample 121 of the training set: {'idx': 6586, 'sentence1': '🔴به هر کجا که روی، روی من همان سوی است☀️ استادم☀️ #Taheri_Movement', 'label': 0, 'input_ids': [0, 6, 244361, 6963, 2255, 80729, 403, 5852, 50, 5852, 230, 25244, 22676, 477, 231873, 15755, 28342, 376, 231873, 15755, 468, 9874, 86151, 454, 9083, 272, 674, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Step,Training Loss,Validation Loss,Accuracy
50,0.503,0.422624,0.87
100,0.3324,0.29755,0.89
150,0.1169,0.414849,0.908
200,0.0583,0.530542,0.909
250,0.0314,0.801202,0.882
300,0.0121,0.673991,0.9
350,0.0141,0.767564,0.892
400,0.0006,0.741677,0.899
450,0.0005,0.776936,0.896
500,0.0005,0.779241,0.895


[INFO|trainer.py:710] 2023-02-21 15:21:59,049 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:21:59,053 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:21:59,056 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:21:59,060 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:22:27,719 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:22:27,724 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =      33.33
  total_flos               =   459758GF
  train_loss               =      0.107
  train_runtime            = 0:05:02.40
  train_samples            =        225
  train_samples_per_second =     26.454
  train_steps_per_second   =      1.653


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:26:52,628 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:26:52,633 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:26:52,635 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:26:52,638 >>   Batch size = 16


***** eval metrics *****
  epoch                   =      33.33
  eval_accuracy           =      0.895
  eval_loss               =     0.7792
  eval_runtime            = 0:00:07.27
  eval_samples            =       1000
  eval_samples_per_second =    137.541
  eval_steps_per_second   =      8.665


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:27:04,165 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:27:04,168 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 2


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-ca1caa87e5fe4361.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:27:04,387 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:27:04,389 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-5a003ea3d777593c.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-afdfc1d4fe406e91.arrow
[INFO|trainer.py:511] 2023-02-21 15:27:10,423 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:27:10,436 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:27:10,445 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:27:10,447 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:27:10,452 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-14641961a57906a5.arrow
[INFO|training_args.py:1402] 2023-02-21 15:27:18,476 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:27:18,477 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-544f946e559e0708.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:27:18,702 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:27:18,704 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-30e35c8aef52e46c.arrow
INFO:root:Sample 242 of the training set: {'idx': 520, 'sentence1': 'این منشن وزیر خارجه بحرین به ظریف عالیه، دو سه بار جر خوردم از خنده :)))))))))))))))', 'label': 1, 'input_ids': [0, 498, 230, 10769, 6697, 89575, 52342, 2727, 178, 164963, 33231, 176, 50, 2254, 8052, 3697, 21542, 88186, 376, 270, 158507, 83999, 167089, 167089, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Step,Training Loss,Validation Loss,Accuracy
50,0.5797,0.385208,0.856
100,0.2659,0.336184,0.904
150,0.2022,0.630298,0.864
200,0.1348,0.438637,0.915
250,0.0724,0.467109,0.918
300,0.0507,0.692912,0.891
350,0.0441,0.543046,0.914
400,0.0369,0.489143,0.921
450,0.0194,0.566675,0.91
500,0.0223,0.556186,0.914


[INFO|trainer.py:710] 2023-02-21 15:27:46,132 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:27:46,136 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:27:46,139 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:27:46,141 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:28:15,170 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:28:15,174 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =      23.81
  total_flos               =   474583GF
  train_loss               =     0.1428
  train_runtime            = 0:05:06.80
  train_samples            =        325
  train_samples_per_second =     26.075
  train_steps_per_second   =       1.63


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:32:43,903 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:32:43,908 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:32:43,918 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:32:43,919 >>   Batch size = 16


***** eval metrics *****
  epoch                   =      23.81
  eval_accuracy           =      0.914
  eval_loss               =     0.5562
  eval_runtime            = 0:00:07.28
  eval_samples            =       1000
  eval_samples_per_second =    137.245
  eval_steps_per_second   =      8.646


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:32:55,504 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:32:55,506 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 3


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9bc55971db26ab0d.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:32:55,760 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:32:55,762 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-c9a6d9023b0bf3b2.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-2e694f5ca9e2f67f.arrow
[INFO|trainer.py:511] 2023-02-21 15:33:02,093 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:33:02,107 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:33:02,117 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:33:02,122 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:33:02,124 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-fc0227d87324ae7d.arrow
[INFO|training_args.py:1402] 2023-02-21 15:33:10,109 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:33:10,110 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-76d75319d52daf34.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:33:10,319 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:33:10,321 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-1bacb8a711cfd34d.arrow
INFO:root:Sample 242 of the training set: {'idx': 520, 'sentence1': 'این منشن وزیر خارجه بحرین به ظریف عالیه، دو سه بار جر خوردم از خنده :)))))))))))))))', 'label': 1, 'input_ids': [0, 498, 230, 10769, 6697, 89575, 52342, 2727, 178, 164963, 33231, 176, 50, 2254, 8052, 3697, 21542, 88186, 376, 270, 158507, 83999, 167089, 167089, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

Step,Training Loss,Validation Loss,Accuracy
50,0.5352,0.321198,0.865
100,0.2624,0.315943,0.905
150,0.1232,0.476763,0.907
200,0.0539,0.513318,0.914
250,0.0876,0.513383,0.919
300,0.0331,0.525371,0.916
350,0.0434,0.70234,0.901
400,0.0111,0.707605,0.9
450,0.0009,0.74223,0.9
500,0.0007,0.722919,0.904


[INFO|trainer.py:710] 2023-02-21 15:33:38,124 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:33:38,129 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:33:38,136 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:33:38,138 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:34:07,315 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:34:07,319 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =      18.52
  total_flos               =   482363GF
  train_loss               =     0.1152
  train_runtime            = 0:05:09.22
  train_samples            =        425
  train_samples_per_second =     25.871
  train_steps_per_second   =      1.617


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:38:37,751 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:38:37,755 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:38:37,757 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:38:37,759 >>   Batch size = 16


***** eval metrics *****
  epoch                   =      18.52
  eval_accuracy           =      0.904
  eval_loss               =     0.7229
  eval_runtime            = 0:00:07.26
  eval_samples            =       1000
  eval_samples_per_second =    137.639
  eval_steps_per_second   =      8.671


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:38:49,229 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:38:49,230 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 4


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-6df8f393621c2368.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:38:49,553 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:38:49,558 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-839da528d083b408.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-661d72e2147a5b7e.arrow
[INFO|trainer.py:511] 2023-02-21 15:38:56,346 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:38:56,358 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:38:56,365 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:38:56,369 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:38:56,371 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-f981ddf654314fbd.arrow
[INFO|training_args.py:1402] 2023-02-21 15:39:04,977 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:39:04,984 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-6a39d174dce8b7d2.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:39:15,195 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:39:15,197 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-0bc4b12dc196d487.arrow
INFO:root:Sample 485 of the training set: {'idx': 63, 'sentence1': 'جملات الهام بخش میخونم سرمو تکون میدمچشمامو تنگ میکنمو به افق خیره میشمکه سرزنشگر درونم ملتفت بشه', 'label': 0, 'input_ids': [0, 97415, 396, 139926, 6771, 383, 47577, 28918, 2900, 15617, 4675, 900, 383, 16994, 4025, 870, 56898, 431, 92845, 107697, 431, 178, 878, 18809, 41500, 176, 383, 30771, 19654, 2900, 26760, 870, 11277, 73756, 376, 23433, 10488, 117774, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss,Validation Loss,Accuracy
50,0.5654,0.451039,0.732
100,0.4035,0.337484,0.889
150,0.2481,0.265187,0.904
200,0.179,0.406298,0.915
250,0.174,0.416112,0.916
300,0.1157,0.38362,0.922
350,0.0802,0.418301,0.92
400,0.0691,0.515517,0.91
450,0.0802,0.529411,0.909
500,0.0483,0.501089,0.917


[INFO|trainer.py:710] 2023-02-21 15:40:10,259 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:40:10,263 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:40:10,265 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:40:10,267 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:40:39,918 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:40:39,924 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =      15.15
  total_flos               =   487325GF
  train_loss               =     0.1963
  train_runtime            = 0:05:09.49
  train_samples            =        525
  train_samples_per_second =     25.848
  train_steps_per_second   =      1.616


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:45:10,008 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:45:10,015 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:45:10,017 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:45:10,022 >>   Batch size = 16


***** eval metrics *****
  epoch                   =      15.15
  eval_accuracy           =      0.917
  eval_loss               =     0.5011
  eval_runtime            = 0:00:07.29
  eval_samples            =       1000
  eval_samples_per_second =    137.095
  eval_steps_per_second   =      8.637


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:45:21,560 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:45:21,562 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 5


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-f28a0afaf18fa38c.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:45:21,802 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:45:21,806 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-49aa274a6b7fd1e3.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-3153f65bc4b32e59.arrow
[INFO|trainer.py:511] 2023-02-21 15:45:27,988 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:45:28,004 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:45:28,012 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:45:28,012 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:45:28,017 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9b9b8718ee4681f1.arrow
[INFO|training_args.py:1402] 2023-02-21 15:45:36,343 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:45:36,345 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-5bfd7dee67cd415c.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:45:36,587 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:45:36,589 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9837d8234df9ccc2.arrow
INFO:root:Sample 485 of the training set: {'idx': 63, 'sentence1': 'جملات الهام بخش میخونم سرمو تکون میدمچشمامو تنگ میکنمو به افق خیره میشمکه سرزنشگر درونم ملتفت بشه', 'label': 0, 'input_ids': [0, 97415, 396, 139926, 6771, 383, 47577, 28918, 2900, 15617, 4675, 900, 383, 16994, 4025, 870, 56898, 431, 92845, 107697, 431, 178, 878, 18809, 41500, 176, 383, 30771, 19654, 2900, 26760, 870, 11277, 73756, 376, 23433, 10488, 117774, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss,Validation Loss,Accuracy
50,0.5897,0.511763,0.732
100,0.3832,0.347798,0.866
150,0.3501,0.319248,0.891
200,0.2829,0.418762,0.904
250,0.1184,0.527164,0.912
300,0.1367,0.35025,0.918
350,0.0847,0.483057,0.916
400,0.0656,0.486232,0.919
450,0.0733,0.518786,0.912
500,0.0613,0.526412,0.912


[INFO|trainer.py:710] 2023-02-21 15:46:04,603 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:46:04,608 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:46:04,611 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:46:04,612 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:46:33,761 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:46:33,766 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       12.5
  total_flos               =   479055GF
  train_loss               =     0.2146
  train_runtime            = 0:05:08.32
  train_samples            =        625
  train_samples_per_second =     25.947
  train_steps_per_second   =      1.622


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:51:04,067 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:51:04,071 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:51:04,076 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:51:04,078 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       12.5
  eval_accuracy           =      0.912
  eval_loss               =     0.5264
  eval_runtime            = 0:00:07.34
  eval_samples            =       1000
  eval_samples_per_second =    136.182
  eval_steps_per_second   =      8.579


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:51:15,703 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:51:15,704 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 6


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-2de26d148d62298e.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:51:15,948 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:51:15,950 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-de8782c7c3e2a6fc.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-5917ba611e27d7e4.arrow
[INFO|trainer.py:511] 2023-02-21 15:51:22,728 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:51:22,742 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:51:22,753 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:51:22,757 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:51:22,761 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-c8b526df911f9947.arrow
[INFO|training_args.py:1402] 2023-02-21 15:51:30,879 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:51:30,882 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9e6deb7bb9fe4599.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:51:31,108 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:51:31,110 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-2cc59ff20a3449c6.arrow
INFO:root:Sample 485 of the training set: {'idx': 63, 'sentence1': 'جملات الهام بخش میخونم سرمو تکون میدمچشمامو تنگ میکنمو به افق خیره میشمکه سرزنشگر درونم ملتفت بشه', 'label': 0, 'input_ids': [0, 97415, 396, 139926, 6771, 383, 47577, 28918, 2900, 15617, 4675, 900, 383, 16994, 4025, 870, 56898, 431, 92845, 107697, 431, 178, 878, 18809, 41500, 176, 383, 30771, 19654, 2900, 26760, 870, 11277, 73756, 376, 23433, 10488, 117774, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss,Validation Loss,Accuracy
50,0.5686,0.425097,0.732
100,0.3337,0.428839,0.875
150,0.2426,0.416137,0.884
200,0.1878,0.38785,0.915
250,0.1091,0.67215,0.881
300,0.0874,0.468614,0.915
350,0.0946,0.483304,0.914
400,0.0705,0.518212,0.914
450,0.0567,0.475582,0.917
500,0.0409,0.482256,0.916


[INFO|trainer.py:710] 2023-02-21 15:51:59,552 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:51:59,556 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:51:59,557 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:51:59,561 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:52:28,806 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:52:28,812 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =      10.87
  total_flos               =   483343GF
  train_loss               =     0.1792
  train_runtime            = 0:05:09.44
  train_samples            =        725
  train_samples_per_second =     25.853
  train_steps_per_second   =      1.616


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:56:59,763 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:56:59,769 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:56:59,776 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 15:56:59,781 >>   Batch size = 16


***** eval metrics *****
  epoch                   =      10.87
  eval_accuracy           =      0.916
  eval_loss               =     0.4823
  eval_runtime            = 0:00:07.31
  eval_samples            =       1000
  eval_samples_per_second =    136.754
  eval_steps_per_second   =      8.616


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 15:57:11,352 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:57:11,354 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 7


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-19850fc3a9afa22c.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:57:11,579 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:57:11,581 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-280b47efca5502d6.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-e5fb6a0a54ad2258.arrow
[INFO|trainer.py:511] 2023-02-21 15:57:17,882 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 15:57:17,893 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:57:17,899 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 15:57:17,901 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 15:57:17,904 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-151806b784a5d71f.arrow
[INFO|training_args.py:1402] 2023-02-21 15:57:26,100 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 15:57:26,103 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-fd1645f29a4623d8.arrow
[INFO|configuration_utils.py:660] 2023-02-21 15:57:26,341 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 15:57:26,344 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-e3b1ba9e3a2e20df.arrow
INFO:root:Sample 485 of the training set: {'idx': 63, 'sentence1': 'جملات الهام بخش میخونم سرمو تکون میدمچشمامو تنگ میکنمو به افق خیره میشمکه سرزنشگر درونم ملتفت بشه', 'label': 0, 'input_ids': [0, 97415, 396, 139926, 6771, 383, 47577, 28918, 2900, 15617, 4675, 900, 383, 16994, 4025, 870, 56898, 431, 92845, 107697, 431, 178, 878, 18809, 41500, 176, 383, 30771, 19654, 2900, 26760, 870, 11277, 73756, 376, 23433, 10488, 117774, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss,Validation Loss,Accuracy
50,0.531,0.458896,0.841
100,0.3514,0.312962,0.904
150,0.221,0.299079,0.902
200,0.1315,0.399625,0.912
250,0.0835,0.435024,0.915
300,0.1069,0.415196,0.918
350,0.0815,0.432295,0.919
400,0.0723,0.472336,0.914
450,0.065,0.522392,0.909
500,0.0515,0.470605,0.915


[INFO|trainer.py:710] 2023-02-21 15:57:54,748 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:57:54,757 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 15:57:54,760 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 15:57:54,762 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 15:58:24,048 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 15:58:24,055 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       9.62
  total_flos               =   486223GF
  train_loss               =     0.1696
  train_runtime            = 0:05:10.90
  train_samples            =        825
  train_samples_per_second =     25.731
  train_steps_per_second   =      1.608


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:02:56,023 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:02:56,033 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:02:56,035 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 16:02:56,040 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       9.62
  eval_accuracy           =      0.915
  eval_loss               =     0.4706
  eval_runtime            = 0:00:07.31
  eval_samples            =       1000
  eval_samples_per_second =     136.76
  eval_steps_per_second   =      8.616


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 16:03:07,617 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:03:07,618 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 8


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-9107485866323697.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:03:07,887 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:03:07,889 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-c280c8c925e04276.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-59bf544376464166.arrow
[INFO|trainer.py:511] 2023-02-21 16:03:14,611 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:03:14,620 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:03:14,626 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:03:14,628 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 16:03:14,630 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-923802991924db91.arrow
[INFO|training_args.py:1402] 2023-02-21 16:03:22,777 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:03:22,779 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-232f5252b23f6328.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:03:23,039 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:03:23,041 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-21637dadc5956ad7.arrow
INFO:root:Sample 485 of the training set: {'idx': 63, 'sentence1': 'جملات الهام بخش میخونم سرمو تکون میدمچشمامو تنگ میکنمو به افق خیره میشمکه سرزنشگر درونم ملتفت بشه', 'label': 0, 'input_ids': [0, 97415, 396, 139926, 6771, 383, 47577, 28918, 2900, 15617, 4675, 900, 383, 16994, 4025, 870, 56898, 431, 92845, 107697, 431, 178, 878, 18809, 41500, 176, 383, 30771, 19654, 2900, 26760, 870, 11277, 73756, 376, 23433, 10488, 117774, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

Step,Training Loss,Validation Loss,Accuracy
50,0.5506,0.507333,0.732
100,0.3274,0.348004,0.878
150,0.2918,0.37203,0.895
200,0.178,0.363367,0.915
250,0.1117,0.581776,0.903
300,0.1631,0.468988,0.915
350,0.0681,0.522556,0.916
400,0.073,0.522734,0.916
450,0.0728,0.496918,0.917
500,0.0456,0.507195,0.916


[INFO|trainer.py:710] 2023-02-21 16:03:51,604 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:03:51,609 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 16:03:51,611 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 16:03:51,613 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 16:04:21,154 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:04:21,159 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       8.62
  total_flos               =   488612GF
  train_loss               =     0.1882
  train_runtime            = 0:05:11.69
  train_samples            =        925
  train_samples_per_second =     25.666
  train_steps_per_second   =      1.604


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:08:53,926 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:08:53,932 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:08:53,935 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 16:08:53,941 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       8.62
  eval_accuracy           =      0.916
  eval_loss               =     0.5072
  eval_runtime            = 0:00:07.31
  eval_samples            =       1000
  eval_samples_per_second =    136.688
  eval_steps_per_second   =      8.611


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 16:09:05,478 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:09:05,481 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 9


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-0f57f2ad954c8b12.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:09:05,708 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:09:05,712 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-fd4f1e99e52b8718.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-c149d45597c2b4d1.arrow
[INFO|trainer.py:511] 2023-02-21 16:09:12,253 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:09:12,266 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:09:12,274 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:09:12,278 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 16:09:12,281 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-d48397162cfa5641.arrow
[INFO|training_args.py:1402] 2023-02-21 16:09:21,001 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:09:21,004 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-82a28885fadb0c4e.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:09:21,279 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:09:21,282 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-4a4f01f319526efe.arrow
INFO:root:Sample 971 of the training set: {'idx': 495, 'sentence1': ' حمایت از زن مسلمان...در ضمن گزینه 1و2اولی با گزینه 1و2دومیدومی در تضادند چند چندی مغلطه؟', 'label': 1, 'input_ids': [0, 23348, 270, 17847, 27265, 27, 5963, 17162, 83076, 106, 431, 304, 258, 63413, 412, 83076, 106, 431, 304, 147299, 2929, 114167, 175, 238368, 1787, 7060, 7060, 140, 126192, 75026, 1245, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

Step,Training Loss,Validation Loss,Accuracy
50,0.6062,0.506423,0.732
100,0.3661,0.310393,0.894
150,0.2051,0.341128,0.911
200,0.1264,0.440833,0.901
250,0.1023,0.51393,0.903
300,0.1282,0.616559,0.899
350,0.068,0.526783,0.916
400,0.0563,0.528044,0.914
450,0.0531,0.528567,0.914
500,0.0521,0.537803,0.914


[INFO|trainer.py:710] 2023-02-21 16:09:49,511 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:09:49,515 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 16:09:49,521 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 16:09:49,523 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 16:10:18,719 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:10:18,723 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       7.69
  total_flos               =   483650GF
  train_loss               =     0.1764
  train_runtime            = 0:05:08.93
  train_samples            =       1025
  train_samples_per_second =     25.895
  train_steps_per_second   =      1.618


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:14:48,635 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:14:48,639 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:14:48,642 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 16:14:48,644 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       7.69
  eval_accuracy           =      0.914
  eval_loss               =     0.5378
  eval_runtime            = 0:00:07.31
  eval_samples            =       1000
  eval_samples_per_second =    136.683
  eval_steps_per_second   =      8.611


INFO:root:Candidate samples count for active learning : 500
[INFO|training_args.py:1402] 2023-02-21 16:15:00,245 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:15:00,247 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
eva

Current Active Learning Iteration: 10


Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-793767aa9a88a60b.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:15:00,496 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:15:00,498 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-45e798a9bfcb5135.arrow


Running tokenizer on dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-257ede92eaa6ab08.arrow
[INFO|trainer.py:511] 2023-02-21 16:15:06,664 >> max_steps is given, it will override any value given in num_train_epochs
INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:15:06,679 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:15:06,688 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:15:06,692 >>   Num examples = 500
[INFO|trainer.py:2969] 2023-02-21 16:15:06,695 >>   Batch size = 16


  0%|          | 0/8 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-95843bd4dfcebeb5.arrow
[INFO|training_args.py:1402] 2023-02-21 16:15:15,408 >> PyTorch: setting up devices
[INFO|training_args.py:1230] 2023-02-21 16:15:15,414 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
INFO:root:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=18

Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-e5719437b33ddd24.arrow
[INFO|configuration_utils.py:660] 2023-02-21 16:15:15,712 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
[INFO|configuration_utils.py:712] 2023-02-21 16:15:15,717 >> Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 1

Running tokenizer on dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

INFO:datasets.arrow_dataset:Caching processed dataset at /root/.cache/huggingface/datasets/csv/default-86b48f70a638e299/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-53d526199a9828d6.arrow
INFO:root:Sample 971 of the training set: {'idx': 495, 'sentence1': ' حمایت از زن مسلمان...در ضمن گزینه 1و2اولی با گزینه 1و2دومیدومی در تضادند چند چندی مغلطه؟', 'label': 1, 'input_ids': [0, 23348, 270, 17847, 27265, 27, 5963, 17162, 83076, 106, 431, 304, 258, 63413, 412, 83076, 106, 431, 304, 147299, 2929, 114167, 175, 238368, 1787, 7060, 7060, 140, 126192, 75026, 1245, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

Step,Training Loss,Validation Loss,Accuracy
50,0.5119,0.493852,0.81
100,0.3989,0.443801,0.88
150,0.2124,0.376345,0.907
200,0.1453,0.375972,0.913
250,0.0932,0.435411,0.914
300,0.0848,0.546865,0.899
350,0.0424,0.567234,0.9
400,0.0305,0.516659,0.914
450,0.0231,0.534204,0.915
500,0.0315,0.503279,0.92


[INFO|trainer.py:710] 2023-02-21 16:15:43,938 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:15:43,943 >> ***** Running Evaluation *****
[INFO|trainer.py:2966] 2023-02-21 16:15:43,947 >>   Num examples = 1000
[INFO|trainer.py:2969] 2023-02-21 16:15:43,949 >>   Batch size = 16
[INFO|trainer.py:710] 2023-02-21 16:16:13,222 >> The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:16:13,229 >> ***** Running Evaluation *****
[INFO|trainer.py:296

***** train metrics *****
  epoch                    =       7.04
  total_flos               =   485365GF
  train_loss               =     0.1574
  train_runtime            = 0:05:09.75
  train_samples            =       1125
  train_samples_per_second =     25.827
  train_steps_per_second   =      1.614


INFO:root:*** Predict ***
[INFO|trainer.py:710] 2023-02-21 16:20:43,798 >> The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: sentence1, idx. If sentence1, idx are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2964] 2023-02-21 16:20:43,802 >> ***** Running Prediction *****
[INFO|trainer.py:2966] 2023-02-21 16:20:43,804 >>   Num examples = 1500
[INFO|trainer.py:2969] 2023-02-21 16:20:43,806 >>   Batch size = 16


***** eval metrics *****
  epoch                   =       7.04
  eval_accuracy           =       0.92
  eval_loss               =     0.5033
  eval_runtime            = 0:00:07.33
  eval_samples            =       1000
  eval_samples_per_second =     136.26
  eval_steps_per_second   =      8.584


In [None]:
! mv ./results/max_entropy/ ./drive/MyDrive/Thesis/Data/experiments/