# Walktrough Huggingface Framework 
Using Conll_03, GermEval_14 as datasets + fune-tuning different language models

## Install and imports

In [None]:
pip install transformers tensorboardX tensorboard scikit-learn seqeval psutil sacrebleu rouge-score tensorflow_datasets

# Preprocessing step
#### Change settings (export) appropriate to your experimental setup


In [None]:
!export MAX_LENGTH=128
!export BERT_MODEL=bert-base-multilingual-cased

In [None]:
cat complaints/train.txt complaints/dev.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > complaints/labels.txt

In [None]:
!python3 preprocess.py train.train $BERT_MODEL $MAX_LENGTH  > train.txt
!python3 preprocess.py dev.dev $BERT_MODEL $MAX_LENGTH > dev.txt
!python3 preprocess.py test.test $BERT_MODEL $MAX_LENGTH  > test.txt

# Start fine-tuning

### BERT base cased

In [None]:
!python3 run_ner.py --data_dir ./ --labels ./labels.txt --model_name_or_path bert-base-cased --output_dir /content/bert_base_cased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### BERT base uncased

In [None]:
!python3 run_ner.py --data_dir ./  --labels ./labels.txt --model_name_or_path bert-base-multilingual-uncased --output_dir germeval-model-uncased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### BERT multilingual cased

In [None]:
!python3 run_ner.py --data_dir ./ --labels ./labels.txt --model_name_or_path bert-base-multilingual-cased --output_dir /content/bert-multi-cased --max_seq_length 128  --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### BERT multilingual uncased

In [None]:
!python3 run_ner.py --data_dir ./ --labels ./labels.txt --model_name_or_path bert-base-multilingual-uncased --output_dir /content/bert-multi-uncased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### BERT german cased

In [None]:
!python3 run_ner.py --data_dir ./ --labels ./labels.txt --model_name_or_path bert-base-german-cased --output_dir /content/bert-base-german-cased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict 

### BERT german uncased

In [None]:
!python3 run_ner.py --data_dir ./ --model_type bert --labels ./labels.txt --model_name_or_path bert-base-multilingual-uncased --output_dir germeval-model-uncased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### dbmdz/bert-base-german-cased

In [None]:
!python3 run_ner.py --data_dir /content/complaints/ --labels /content/complaints/labels.txt --model_name_or_path dbmdz/bert-base-german-cased --output_dir /content/complaints-dbmdz-bert-base-german-cased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### dbmdz-bert-base-german-uncased

In [None]:
!python3 run_ner.py --data_dir ./ --model_type bert --labels ./labels.txt --model_name_or_path bert-base-multilingual-uncased --output_dir germeval-model-uncased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### distilbert-base-german-cased

In [None]:
!python3 run_ner.py --data_dir /content/complaints/ --labels /content/complaints/labels.txt --model_name_or_path distilbert-base-german-cased --output_dir /content/complaints-distilbert-base-german-uncased --max_seq_length 256 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict

### distilbert-base-german-uncased

In [None]:
!python3 run_ner.py --data_dir ./ --model_type bert --labels ./labels.txt --model_name_or_path bert-base-multilingual-uncased --output_dir germeval-model-uncased --max_seq_length 128 --num_train_epochs 3 --per_gpu_train_batch_size 32 --save_steps 750 --seed 1 --do_train --do_eval --do_predict