# Loading data using Hugging Face datasets methods

https://huggingface.co/docs/datasets/v1.11.0/loading_datasets.html

https://huggingface.co/learn/nlp-course/chapter5/5?fw=tf

In [1]:
import tensorflow as tf
from datasets import load_dataset, Dataset, DatasetDict 
import pandas as pd
import sys
import os
import glob
import numpy as np
import toml
import json

from transformers import (AutoTokenizer,
                         TFAutoModelForSequenceClassification,
                          TFBertForSequenceClassification,
                         DataCollatorWithPadding,
                         TFPreTrainedModel,
              TFGPT2ForSequenceClassification,)

# keras for training
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam
import sklearn.metrics as metrics
from lxml import etree

currentdir = os.path.abspath(os.path.curdir)
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
sys.path.insert(0,parentdir+'/embed') 
from classifier_trainer.trainer import stream_arxiv_paragraphs
import parsing_xml as px
import peep_tar as peep

from train_lstm import gen_cfg, find_best_cutoff
%load_ext autoreload
%autoreload 2
from extract import Definiendum
args = []
#xml_lst, cfg = gen_cfg(config_path='../config.toml')

In [2]:
tf.config.list_physical_devices("GPU")                                  

2023-10-23 20:29:13.491447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-23 20:29:13.492612: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-23 20:29:13.492816: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
cfg = toml.load('../config.toml')
cfg = cfg['finetuning']

In [4]:
xml_lst = glob.glob('/media/hd1/training_defs/math18/*.xml.gz')
#xml_lst = xml_lst[:len(xml_lst)//4]

In [5]:
stream = stream_arxiv_paragraphs(xml_lst, samples=cfg['batch_size'])

all_data = []
all_labels = []
all_texts = []
for s in stream:
    try:
        #all_data += list(zip(s[0], s[1]))
        all_texts += s[0]
        all_labels += s[1]
    except IndexError:
        logger.warning('Index error in the data stream.')
data_dict = {
    'text': all_texts,
    'label': all_labels
}
ds = Dataset.from_dict(data_dict)

In [6]:
all_texts[:3]

[' We define ',
 ' If _inline_math_, take coordinates as _inline_math_ such that _inline_math_. Then, nonlinear connection coefficients _inline_math_ are given by the derivatives of the functions _inline_math_ defined below with respect to _inline_math_: _inline_math_, (II.7) Equation II.7 _inline_math_ where the matrices _inline_math_ are the inverse of the matrices _inline_math_, and _inline_math_ are defined by (II.8) Equation II.8 _inline_math_ and _inline_math_ are arbtrary homogeneity-two functions and _inline_math_ are such 0-eigenvalued homogeneity-zero functions that _inline_math_. ',
 ' An _inline_math_-ordered _inline_math_-vector space is a _inline_math_-vector space _inline_math_ equipped with an _inline_math_-ordering _inline_math_ such that _inline_math_ is an ordered _inline_math_-vector space with respect to each _inline_math_. In other words, _inline_math_ is a vector space over _inline_math_, and for every _inline_math_, _inline_math_ and _inline_math_: _display_math

In [7]:
# quick check the loading of the model
#checkpoint = 'bert-base-uncased'
checkpoint = 'gpt2'
#checkpoint = 'distilgpt2'
#checkpoint = 'openai-gpt'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
#tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id

#tok2 = AutoTokenizer.from_pretrained('roberta-base')

#sequences = all_texts[:10]
#batch = dict(
#    tokenizer(sequences, return_tensors='tf',
#             padding=True, truncation=True)
#)
#model.compile(
#    optimizer='adam',
#    loss='binary_crossentropy'
#)
#labels = tf.convert_to_tensor(all_labels[:10])
#model.train_on_batch(batch, labels)

2023-10-23 20:29:15.180970: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-23 20:29:15.182056: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-23 20:29:15.182295: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-23 20:29:15.182468: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [8]:
print(tokenizer(['hi, this is exciting',
          'this is getting boring']))

{'input_ids': [[5303, 11, 428, 318, 7895], [5661, 318, 1972, 14262]], 'attention_mask': [[1, 1, 1, 1, 1], [1, 1, 1, 1]]}


In [9]:
def tok_function(example):
    # This function can be used with the Dataset.map() method
    return tokenizer(example['text'], truncation=True)

def add_missing_token_type(ex):
    #print(len(ex['attention_mask']))
    ex['token_type_ids'] = [len(x)*[0] for x in ex['attention_mask']]
    return ex

tkn_data = ds.map(tok_function, batched=True)
print(tkn_data)

  0%|          | 0/140 [00:00<?, ?ba/s]

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 139787
})


In [10]:
# shrink the data
# and split into train, test, and validation
tkn_data = tkn_data.select(range(int(0.1*len(tkn_data))))
temp1_dd = tkn_data.train_test_split(test_size=0.1, shuffle=True)
temp2_dd = temp1_dd['train'].train_test_split(test_size=0.1, shuffle=True)

tkn_data = DatasetDict({
    'train': temp2_dd['train'],
    'test': temp1_dd['test'],
    'valid': temp2_dd['test'],
})
del temp1_dd
del temp2_dd
tkn_data  

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 11322
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 1398
    })
    valid: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 1258
    })
})

In [11]:
# This function does no accept the return_tensors argument.
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors='tf')
#column_lst = ['attention_mask', 'input_ids', 'token_type_ids']
column_lst = ['attention_mask', 'input_ids' ]

#For GPT2 which has no padding token

#tokenizer.pad_token = tokenizer.special_tokens_map['eos_token']

# Take care of everyting using `to_tf_dataset()`
tf_train_data = tkn_data['train'].to_tf_dataset(
       columns=column_lst,
       label_cols=['label'],
       shuffle=True,
       collate_fn=data_collator,
       batch_size=1 )

tf_valid_data = tkn_data['valid'].to_tf_dataset(
       columns=column_lst,
       label_cols=['label'],
       shuffle=True,
       collate_fn=data_collator,
       batch_size=1 )

tf_test_data = tkn_data['test'].to_tf_dataset(
       columns=column_lst,
       label_cols=['label'],
       shuffle=False,
       collate_fn=data_collator,
       batch_size=1 )

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [12]:
ii = next(iter(tf_train_data))
#ii['input_ids']
print(ii[0])
print(ii[1])
M = model(**ii[0])

{'input_ids': <tf.Tensor: shape=(1, 24), dtype=int64, numpy=
array([[  775,   910,   326,   262, 18032,   414, 14078,  6622,   329,
          281, 24986, 16578,   283,  2214,   611,   357,  3682,     8,
         4808, 13812,    62, 11018,    62,   220]])>, 'attention_mask': <tf.Tensor: shape=(1, 24), dtype=int64, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1]])>}
tf.Tensor([1.], shape=(1,), dtype=float32)


In [13]:
#%%script echo This is the training cell
# Decay the learning rate w/ PolynomialDecay

batch_size = 8
num_epochs = 1
num_train_steps = len(tf_train_data)*num_epochs
lr_scheduler = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=num_train_steps
)

opt = Adam(learning_rate=lr_scheduler)

#reload the model to change the optimizer
#model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
#model = TFGPT2ForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
#model.resize_token_embeddings(new_num_tokens=50257 )
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=opt,
             loss=loss,
             metrics=['accuracy'])

# Training
model.fit( tf_train_data, validation_data=tf_valid_data, epochs=num_epochs)



<keras.callbacks.History at 0x7fa6600bfb20>

In [14]:
#model.fit( tf_train_data,  epochs=3)

In [15]:
%%script echo no hacer
#model.fit( tf_train_data, validation_data=tf_valid_data, epochs=num_epochs)
    
model_path = '/media/hd1/TransformersFineTuned/class-2023-06-29_1436/'
with open(model_path+'/cfg_dict.json', 'r') as fobj:
    cfg = json.loads(fobj.read())
    
#model = TFAutoModelForSequenceClassification.from_pretrained(model_path+'model')
model = TFBertForSequenceClassification.from_pretrained(model_path+'model')
tokenizer = AutoTokenizer.from_pretrained(cfg['checkpoint'])

no hacer


In [16]:
%%script echo no hacer
# test the the opening of the promath tar.gz, parsing and extracting workflow with HF transformers
tarpath = '/media/hd1/promath/math19/1906_002.tar.gz'
tar_iter = peep.tar_iter(tarpath, '.xml')
fname, tobj = next(tar_iter)
parsing_obj = px.DefinitionsXML(tobj)
dd = Definiendum(parsing_obj, model, None, None, tokenizer)

no hacer


In [18]:
prepreds = model.predict(tf_test_data)

ERROR:asyncio:Future exception was never retrieved
future: <Future finished exception=BrokenPipeError(32, 'Broken pipe')>
Traceback (most recent call last):
  File "/usr/lib/python3.10/asyncio/unix_events.py", line 676, in write
    n = os.write(self._fileno, data)
BrokenPipeError: [Errno 32] Broken pipe
2023-10-23 20:40:04.895955: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (GPU_0_bfc) ran out of memory trying to allocate 756.0KiB (rounded to 774144)requested by op tfgpt2_for_sequence_classification/transformer/h_._7/mlp/Gelu/Pow
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-10-23 20:40:04.896430: I tensorflow/core/common_runtime/bfc_allocator.cc:1010] BFCAllocator dump for GPU_0_bfc
2023-10-23 20:40:04.896447: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (256): 	Total Chunks: 1197, Chun

ResourceExhaustedError: Graph execution error:

Detected at node 'tfgpt2_for_sequence_classification/transformer/h_._7/mlp/Gelu/Pow' defined at (most recent call last):
    File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/luis/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1041, in launch_instance
      app.start()
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 724, in start
      self.io_loop.start()
    File "/home/luis/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 512, in dispatch_queue
      await self.process_one()
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 501, in process_one
      await dispatch(*args)
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 408, in dispatch_shell
      await result
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 731, in execute_request
      reply_content = await reply_content
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 417, in do_execute
      res = shell.run_cell(
    File "/home/luis/.local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2945, in run_cell
      result = self._run_cell(
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3000, in _run_cell
      return runner(coro)
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3203, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3382, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/luis/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3442, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_217841/208822099.py", line 1, in <module>
      prepreds = model.predict(tf_test_data)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1982, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1801, in predict_function
      return step_function(self, iterator)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1790, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1783, in run_step
      outputs = model.predict_step(data)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1751, in predict_step
      return self(x, training=False)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 1148, in run_call_with_unpacked_inputs
      return cls._from_config(config, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 1160, in call
      transformer_outputs = self.transformer(
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 1148, in run_call_with_unpacked_inputs
      return cls._from_config(config, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 476, in call
      for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 480, in call
      outputs = block(
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 296, in call
      m = self.mlp(m, training=training)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 218, in call
      h = self.act(self.c_fc(x))
    File "/home/luis/.local/lib/python3.10/site-packages/transformers/activations_tf.py", line 105, in approximate_gelu_wrap
      return tf.keras.activations.gelu(x, approximate=True)
    File "/home/luis/.local/lib/python3.10/site-packages/keras/activations.py", line 351, in gelu
      return tf.nn.gelu(x, approximate)
Node: 'tfgpt2_for_sequence_classification/transformer/h_._7/mlp/Gelu/Pow'
failed to allocate memory
	 [[{{node tfgpt2_for_sequence_classification/transformer/h_._7/mlp/Gelu/Pow}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_predict_function_123179]

In [None]:
preds

In [None]:
preds = model.predict(tf_test_data)['logits']
class_preds = np.argmax(preds, axis=1)

In [None]:
%%script echo no hacer
targets = []
for b in tf_test_data.as_numpy_iterator():
    targets.extend(list(b[1])) 

In [None]:
%%script echo no hacer
metric_str = metrics.classification_report((class_preds > 0.5).astype(int), targets)
print(metric_str)

In [None]:
%%script echo uncomment if want to save
#model.save_pretrained(save_directory='/media/hd1/TransformersFineTuned/BertHF1/')

In [None]:
#dataset = load_dataset("rotten_tomatoes", split="train")

In [None]:
#dataset._info.features

## class-2023-08-14_1829
```
List of XLA GPUs: []
opt_prob=0.1 and f1_max=0.9500941619585688
              precision    recall  f1-score   support

           0       0.95      0.95      0.95     55519
           1       0.95      0.95      0.95     57465

    accuracy                           0.95    112984
   macro avg       0.95      0.95      0.95    112984
weighted avg       0.95      0.95      0.95    112984

              precision    recall  f1-score   support

           0       0.95      0.95      0.95     55519
           1       0.95      0.95      0.95     57465

    accuracy                           0.95    112984
   macro avg       0.95      0.95      0.95    112984
weighted avg       0.95      0.95      0.95    112984

{'shrink_data_factor': 1.0, 'checkpoint': 'bert-large-cased', 'glob_data_source': 'training_defs/math1*/*.xml.gz', 'data_stream_batch_size': 5000, 'num_epochs': 2, 'batch_size': 32, 'initial_lr': 2e-06, 'end_lr': 0, 'savedir': '/opt/data_dir/finetune/class-2023-08-14_1829/model', 'configpath': '/opt/arxivDownload/config.toml', 'base_dir': '/opt/data_dir', 'local_dir': '/tmp/trainer', 'timestamp': 'Aug-14_18-29', 'save_path_dir': '/opt/data_dir/trained_models/finetuning/HFTransformers_Aug-14_18-29', 'num_train_steps': 228792}
```

## class-2023-08-07_1327
```
List of XLA GPUs: []
opt_prob=0.1 and f1_max=0.9479913667061198
              precision    recall  f1-score   support

           0       0.94      0.95      0.95      6788
           1       0.95      0.95      0.95      7191

    accuracy                           0.95     13979
   macro avg       0.95      0.95      0.95     13979
weighted avg       0.95      0.95      0.95     13979

              precision    recall  f1-score   support

           0       0.94      0.95      0.95      6788
           1       0.95      0.95      0.95      7191

    accuracy                           0.95     13979
   macro avg       0.95      0.95      0.95     13979
weighted avg       0.95      0.95      0.95     13979

{'shrink_data_factor': 1.0, 'checkpoint': 'bert-large-cased', 'glob_data_source': 'training_defs/math18/*.xml.gz', 'data_stream_batch_size': 5000, 'num_epochs': 3, 'batch_size': 32, 'initial_lr': 6e-06, 'end_lr': 0, 'savedir': '/opt/data_dir/finetune/class-2023-08-07_1327/model', 'configpath': '/opt/arxivDownload/config.toml', 'base_dir': '/opt/data_dir', 'local_dir': '/tmp/trainer', 'timestamp': 'Aug-07_13-28', 'save_path_dir': '/opt/data_dir/trained_models/finetuning/HFTransformers_Aug-07_13-28', 'num_train_steps': 42462}
```

## class-2023-08-05_1512
```
List of XLA GPUs: []
opt_prob=0.1 and f1_max=0.944743935309973
              precision    recall  f1-score   support

           0       0.93      0.94      0.94       653
           1       0.95      0.94      0.94       745

    accuracy                           0.94      1398
   macro avg       0.94      0.94      0.94      1398
weighted avg       0.94      0.94      0.94      1398

              precision    recall  f1-score   support

           0       0.93      0.94      0.94       653
           1       0.95      0.94      0.94       745

    accuracy                           0.94      1398
   macro avg       0.94      0.94      0.94      1398
weighted avg       0.94      0.94      0.94      1398

{'shrink_data_factor': 0.1, 'checkpoint': 'bert-large-cased', 'glob_data_source': 'training_defs/math18/*.xml.gz', 'data_stream_batch_size': 5000, 'num_epochs': 3, 'batch_size': 32, 'initial_lr': 2e-05, 'end_lr': 0, 'savedir': '/opt/data_dir/finetune/class-2023-08-05_1512/model', 'configpath': '/opt/arxivDownload/config.toml', 'base_dir': '/opt/data_dir', 'local_dir': '/tmp/trainer', 'timestamp': 'Aug-05_15-12', 'save_path_dir': '/opt/data_dir/trained_models/finetuning/HFTransformers_Aug-05_15-12', 'num_train_steps': 4248}
```

## class-2023-10-20_1733
```
opt_prob=0.1 and f1_max=0.9485215543995035
              precision    recall  f1-score   support

           0       0.95      0.95      0.95     77142
           1       0.95      0.95      0.95     78085

    accuracy                           0.95    155227
   macro avg       0.95      0.95      0.95    155227
weighted avg       0.95      0.95      0.95    155227

              precision    recall  f1-score   support

           0       0.95      0.95      0.95     77142
           1       0.95      0.95      0.95     78085

    accuracy                           0.95    155227
   macro avg       0.95      0.95      0.95    155227
weighted avg       0.95      0.95      0.95    155227

{'shrink_data_factor': 1.0, 'checkpoint': 'roberta-large', 'glob_data_source': 'training_defs/math*/*.xml.gz', 'data_stream_batch_size': 5000, 'num_epochs': 3, 'batch_size': 32, 'initial_lr': 5e-06, 'end_lr': 0.0, 'savedir': '/opt/data_dir/finetune/class-2023-10-20_1733/model', 'configpath': '/opt/arxivDownload/rmme_config.toml', 'base_dir': '/opt/data_dir', 'local_dir': '/tmp/trainer', 'timestamp': 'Oct-20_17-33', 'save_path_dir': '/opt/data_dir/trained_models/finetuning/HFTransformers_Oct-20_17-33', 'num_train_steps': 117876, 'opt_thresh': 0.1, 'f1_max': 0.9485215543995035}
```