In [None]:
!pip install simpletransformers > /dev/null

In [None]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


# Load training data
training_data_path = "/HF Approach/training_new.csv"
test_data_path = "/HF Approach/test_new.csv"

train_df = pd.read_csv(training_data_path)
train_df.drop('number_of_words', axis=1, inplace=True)
train_df = train_df.rename(columns={'label': 'labels'})

test_df = pd.read_csv(test_data_path)
test_df.drop('number_of_words', axis=1, inplace=True)
test_df = test_df.rename(columns={'label': 'labels'})

train_df.head()

Unnamed: 0,labels,text
0,0,Ein Frisör schneidet den Menschen die Haare. D...
1,0,Der Name Miley kommt aus Miley Cyrus Kindheit....
2,0,Der Vogel hat sich in der Plastik-Folie verfan...
3,0,Das passt nicht zu einer Prinzessin und zu Abe...
4,0,Und die anderen Menschen verstehen die Gebärde...


In [None]:
from zipfile import ZipFile
import shutil
import os.path

def save_model(name, src_path, dest_path):

  # create a ZipFile object
  zipObj = ZipFile('/content/temp-model.zip', 'w')
  # Add multiple files to the zip
  file = src_path + "config.json"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "eval_results.txt"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "model_args.json"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "pytorch_model.bin"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "special_tokens_map.json"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "tokenizer.json"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "tokenizer_config.json"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "training_args.bin"
  if os.path.isfile(file):
    zipObj.write(file)

  file = src_path + "vocab.txt"
  if os.path.isfile(file):
    zipObj.write(file)

  # close the Zip File
  zipObj.close()

  # copy file
  shutil.copyfile("/content/temp-model.zip", dest_path + name + ".zip")

Alle 10 Trainingsdurchläufe sind praktisch identisch mit dem folgenden Code.  Er entsprechend dem trainierten Modelle wurden nur jeweils die Pfade und Namen angepasst. Entsprechend der Hyperparameter-Sets wurden wurden die Parameter geändert oder auskommentiert. Da die Google Colab Infrastruktur die parallele Ausführung von bis zu 3 Jupyter-Notebooks zulässt, habe ich den Trainingscode auf mehrere Dateien aufgeteilt und zusätzlich den Code in den Notebooks nach einem erfolgreichen Training für die erneute Ausführung eines anderen Trainingdurchlaufs angepasst. Somit konnte die finale Trainingszeit erheblich reduziert werden.

In [None]:
##### Standard Training #####

# Optional model configuration
model_args = ClassificationArgs(
    overwrite_output_dir = True,
    #learning_rate=2e-5,
    #num_train_epochs=5,
    num_train_epochs=3,
    train_batch_size=24,
    eval_batch_size=16,
    #weight_decay=0.01,
    best_model_dir="/content/best_model",
    use_early_stopping=True
    )

# Create a ClassificationModel
model = ClassificationModel(
    'electra',
    'deepset/gelectra-base',
    num_labels=4,
    use_cuda=True,
    args=model_args
) 

# Train the model
model.train_model(train_df)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(test_df)
print(result)
print(model_outputs)

save_model("simple-transformer-gelectra-12.10.2022", "/content/outputs/", "/HF Approach/output/")

Downloading:   0%|          | 0.00/440 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/gelectra-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at deepset/gelectra-base and are newly initialized: ['classifier.dense.weight', 'classifie

Downloading:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/240k [00:00<?, ?B/s]

  0%|          | 0/260097 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

  0%|          | 0/65026 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4065 [00:00<?, ?it/s]

{'mcc': 0.945861331910856, 'eval_loss': 0.17495257162373298}
[[ 9.984375   -1.56835938 -3.74804688 -4.046875  ]
 [ 9.9453125  -1.42773438 -3.7578125  -4.11328125]
 [ 9.9921875  -1.51953125 -3.77734375 -4.0546875 ]
 ...
 [-6.0390625  -4.703125   -0.4934082   7.78515625]
 [-5.125      -4.05078125 -2.13476562  8.25      ]
 [-5.98828125 -4.640625   -0.70654297  7.8984375 ]]


In [None]:
##### Standard Training: German Bert #####

# Optional model configuration
model_args = ClassificationArgs(
    overwrite_output_dir = True,
    #learning_rate=2e-5,
    #num_train_epochs=5,
    num_train_epochs=3,
    train_batch_size=24,
    eval_batch_size=16,
    #weight_decay=0.01,
    best_model_dir="/content/gbert/best_model",
    output_dir="/content/gbert-2",
    use_early_stopping=True
    )

# Create a ClassificationModel
model = ClassificationModel(
    'bert',
    'deepset/gbert-base',
    num_labels=4,
    use_cuda=True,
    args=model_args
) 

# Train the model
model.train_model(train_df)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(test_df)
print(result)
print(model_outputs)

save_model("simple-transformer-gbert-12.10.2022", "/content/gbert-2/", "/HF Approach/output/")

Downloading:   0%|          | 0.00/362 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/gbert-base were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

Downloading:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/240k [00:00<?, ?B/s]

  0%|          | 0/260097 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/10838 [00:00<?, ?it/s]

  0%|          | 0/65026 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4065 [00:00<?, ?it/s]

{'mcc': 0.9466195028147807, 'eval_loss': 0.1951923787337241}
[[ 9.6640625  -2.17578125 -4.0078125  -2.83203125]
 [ 9.671875   -2.4296875  -3.7890625  -2.80078125]
 [ 9.6171875  -2.59375    -3.9375     -2.52539062]
 ...
 [-4.38671875 -3.66601562 -0.703125    8.34375   ]
 [-3.40234375 -3.3515625  -2.00976562  8.265625  ]
 [-3.87304688 -3.65429688 -1.390625    8.4296875 ]]


In [None]:
##### Standard Training: XLNet #####

# Optional model configuration
model_args = ClassificationArgs(
    overwrite_output_dir = True,
    #learning_rate=2e-5,
    #num_train_epochs=5,
    num_train_epochs=3,
    train_batch_size=24,
    eval_batch_size=16,
    #weight_decay=0.01,
    best_model_dir="/content/best_model",
    use_early_stopping=True,
    output_dir="/content/xlnet-2",
    )

# Create a ClassificationModel
model = ClassificationModel(
    'xlnet',
    'xlnet-base-cased',
    num_labels=4,
    use_cuda=True,
    args=model_args
) 

# Train the model
model.train_model(train_df)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(test_df)
print(result)
print(model_outputs)
save_model("simple-transformer-xlnet-12.10.2022", "/content/xlnet-2/", "/HF Approach/output/")

Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

  f"use_multiprocessing automatically disabled as {model_type}"


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/10838 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/10838 [00:00<?, ?it/s]

  0%|          | 0/65026 [00:00<?, ?it/s]