In [None]:
import os
os.chdir("/content/drive/MyDrive/ColabNotebooks")
# For Colab: Install FARM
!pip install torch==1.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
!pip install farm==0.5.0
!pip install -U -q emoji soynlp
!git clone https://github.com/e9t/nsmc

In [16]:
import os
import re
import emoji
import pandas as pd
from pathlib import Path
from soynlp.normalizer import repeat_normalize

def read_data(path:str, header=None):
    return pd.read_csv(path, sep='\t', header=header)

def clean(x):
    emojis = ''.join(emoji.UNICODE_EMOJI.keys())
    pattern = re.compile(f'[^ .,?!/@$%~％·∼()\x00-\x7Fㄱ-힣{emojis}]+')
    url_pattern = re.compile(
        r'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
    
    x = pattern.sub(' ', x)
    x = url_pattern.sub('', x)
    x = x.strip()
    x = repeat_normalize(x, num_repeats=2)
    return x

def preprocess_dataframe(df:pd.DataFrame):
    r"""
    Changed the code
    source from: https://colab.research.google.com/drive/1IPkZo1Wd-DghIOK6gJpcb0Dv4_Gv2kXB
    """

    label_dict = {0:"bad", 1:"good"}
    df['document'] = df['document'].apply(lambda x: clean(str(x)))
    df['label'] = df['label'].apply(label_dict.get)
    return df

df_train = preprocess_dataframe(read_data("./nsmc/ratings_train.txt", header=0))
df_test = preprocess_dataframe(read_data("./nsmc/ratings_test.txt", header=0))
df_train.loc[:, ["label", "document"]].to_csv("./nsmc/train.tsv", sep="\t", index=False)
df_test.loc[:, ["label", "document"]].to_csv("./nsmc/test.tsv", sep="\t", index=False)

In [17]:
!ls nsmc

code		  ratings_train.txt  raw	synopses.json  train.tsv
ratings_test.txt  ratings.txt	     README.md	test.tsv


In [4]:
import sys
import torch
from pathlib import Path
from farm.modeling.tokenization import Tokenizer
from farm.data_handler.processor import TextClassificationProcessor
from farm.data_handler.data_silo import DataSilo
from farm.modeling.language_model import LanguageModel
from farm.modeling.prediction_head import TextClassificationHead
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.optimization import initialize_optimizer
from farm.train import Trainer
from farm.utils import MLFlowLogger

repo_path = Path() # Path().absolute().parent
sys.path.append(str(repo_path))

ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/")
ml_logger.init_experiment(experiment_name="FARM_tutorial", run_name="NSMC")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Devices available: {}".format(device))

03/29/2021 09:45:11 - INFO - farm.modeling.prediction_head -   Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .



 __          __  _                            _        
 \ \        / / | |                          | |       
  \ \  /\  / /__| | ___ ___  _ __ ___   ___  | |_ ___  
   \ \/  \/ / _ \ |/ __/ _ \| '_ ` _ \ / _ \ | __/ _ \ 
    \  /\  /  __/ | (_| (_) | | | | | |  __/ | || (_) |
     \/  \/ \___|_|\___\___/|_| |_| |_|\___|  \__\___/ 
  ______      _____  __  __  
 |  ____/\   |  __ \|  \/  |              _.-^-._    .--.
 | |__ /  \  | |__) | \  / |           .-'   _   '-. |__|
 |  __/ /\ \ |  _  /| |\/| |          /     |_|     \|  |
 | | / ____ \| | \ \| |  | |         /               \  |
 |_|/_/    \_\_|  \_\_|  |_|        /|     _____     |\ |
                                     |    |==|==|    |  |
|---||---|---|---|---|---|---|---|---|    |--|--|    |  |
|---||---|---|---|---|---|---|---|---|    |==|==|    |  |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Devices available: cuda


<center><img src="https://drive.google.com/uc?id=1hbtUClFoXg45IbViZoFRLnnDGVlr9Dlb" alt="Fine-tuning" width="30%" height="30%"></center>

# FARM

> Framework for Adapting Representation Models

이 패키지를 한 마디로 요약하면 Fine-tuning에 최적화된 도구다.

## Core Features

- **Easy fine-tuning of language models** to your task and domain language
- **Speed**: AMP(Automatic Mixed Precision) optimizers (~35% faster) and parallel preprocessing (16 CPU cores => ~16x faster)
- **Modular design** of language models and prediction heads
- Switch between heads or combine them for **multitask learning**
- **Full Compatibility** with HuggingFace Transformers' models and model hub
- **Smooth upgrading** to newer language models
- Integration of **custom datasets** via Processor class
- Powerful **experiment tracking** & execution
- **Checkpointing & Caching** to resume training and reduce costs with spot instances
- Simple **deployment** and **visualization** to showcase your model

<details>
<summary> AMP </summary>

**Reference**
- https://github.com/NVIDIA/apex
- https://forums.fast.ai/t/mixed-precision-training/20720

**mixed precision training이란**
- 처리 속도를 높이기 위한 FP16(16bit floating point)연산과 정확도 유지를 위한 FP32 연산을 섞어 학습하는 방법
- Tensor Core를 활용한 FP16연산을 이용하면 FP32연산 대비 절반의 메모리 사용량과 8배의 연산 처리량 & 2배의 메모리 처리량 효과가 있다
</details>


# NSMC 데이터 세트로 알아보기

## Fine-tuning Process

<center><img src="https://drive.google.com/uc?id=1j9pn8Lpg7sy6S8Ubvq3E7JLWf28KvRt4" alt="Fine-tuning" width="50%" height="50%" align="center"></center>

Fine-tuning Processing 그림과 같이 진행된다.

* Load Data: 데이터를 알맞는 형식(json, csv 등)으로 불러온다.
* Create Dataset: 데이터세트(Dataset) 만들기
    * Tokenization: 텍스트를 토큰으로 나누고, 단어장(vocab)을 생성한다.
    * ToTensor: vocab에 해당하는 단어를 수치화하는 과정 (`input_ids` in transformers)
    * Attention Mask: 패딩계산을 피하기 위해 Attention 해야할 토큰만 masking(`attention_mask` in transformers)
* Create Dataloader: 훈련, 평가시 배치크기 단위로 데이터를 불러오는 객체
* Create Model:
    * Pretrained Language Model: 대량의 텍스트 데이터로 사전에 훈련된 모델 
$$P(x_t \vert x_{1:t-1})$$
    * Fine-tuninig Layer: Downstream Task에 맞춰서 학습        
$$P(y\vert x_{1:t})$$
* Train Model
* Eval Model
* Inference

In [5]:
# from src import read_data

DATA_PATH = repo_path / "nsmc"
df = read_data(DATA_PATH / "train.tsv", header=0)
df.head(5)

Unnamed: 0,label,document
0,bad,아 더빙.. 진짜 짜증나네요 목소리
1,good,흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나
2,bad,너무재밓었다그래서보는것을추천한다
3,bad,교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정
4,good,사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...


## Processor & Data Silo

<center><img src="https://drive.google.com/uc?id=1XCc0AJpPBMFcC81NW0A6w0mpswZ2KU7h" alt="Fine-tuning" width="60%" height="50%" align="center"></center>

* **Processor**는 file 혹은 request를 PyTorch Datset로 만들어 주는 역할
* **Data Silo**는 train, dev, test sets를 관리하고, Processor의 function들 이용해 각 set를 DataLoader로 변환한다.
    * **Samples**, **SampleBasket**은 raw document를 관리하는 객체이며 tokenized, features등 데이터를 저장하고 있다. 이렇게 하는 이유는 하나의 소스 텍스트(raw text)에서 여러개의 샘플을 생성할 수도 있기 때문이다(e.g. QA task)

In [6]:
PRETRAINED_MODEL_NAME_OR_PATH = "beomi/kcbert-base"  # Reference: https://github.com/Beomi/KcBERT
MAX_LENGTH = 150
LABEL_LIST = ["bad", "good"]
TRAIN_FILE = "train.tsv"
TEST_FILE = "test.tsv"
TASK_TYPE = "text_classification"

tokenizer = Tokenizer.load(
    pretrained_model_name_or_path=PRETRAINED_MODEL_NAME_OR_PATH,
    do_lower_case=False,
)

processor = TextClassificationProcessor(
    tokenizer=tokenizer,
    train_filename=TRAIN_FILE,
    test_filename=TEST_FILE,
    dev_split=0.1,
    header=0,
    max_seq_len=MAX_LENGTH,
    data_dir=str(DATA_PATH),
    label_list=LABEL_LIST,
    metric="acc",
    label_column_name="label",
    text_column_name="document",
    delimiter="\t"
)

data_silo = DataSilo(
    processor=processor,
    batch_size=8,
    eval_batch_size=8,
    caching=True
)

03/29/2021 09:45:12 - INFO - farm.modeling.tokenization -   Loading tokenizer of type 'BertTokenizer'
03/29/2021 09:45:14 - INFO - farm.data_handler.data_silo -   
Loading data into the data silo ... 
              ______
               |o  |   !
   __          |:`_|---'-.
  |__|______.-/ _ \-----.|       
 (o)(o)------'\ _ /     ( )      
 
03/29/2021 09:45:14 - INFO - farm.data_handler.data_silo -   Loading train set from: nsmc/train.tsv 
03/29/2021 09:45:15 - INFO - farm.data_handler.data_silo -   Got ya 1 parallel workers to convert 149539 dictionaries to pytorch datasets (chunksize = 2000)...
03/29/2021 09:45:15 - INFO - farm.data_handler.data_silo -    0 
03/29/2021 09:45:15 - INFO - farm.data_handler.data_silo -   /|\
03/29/2021 09:45:15 - INFO - farm.data_handler.data_silo -   /'\
03/29/2021 09:45:15 - INFO - farm.data_handler.data_silo -   
03/29/2021 09:45:17 - INFO - farm.data_handler.processor -   *** Show 2 random examples ***
03/29/2021 09:45:17 - INFO - farm.data_handler

<center><img src="https://drive.google.com/uc?id=1DVPT_Rjv_SI4ggJZzqfPh0MgsMa1Q9El" alt="Fine-tuning" width="100%" height="50%" align="center"></center>

```plaintext
03/28/2021 22:12:15 - INFO - farm.data_handler.processor -   

      .--.        _____                       _      
    .'_\/_'.     / ____|                     | |     
    '. /\ .'    | (___   __ _ _ __ ___  _ __ | | ___ 
      "||"       \___ \ / _` | '_ ` _ \| '_ \| |/ _ \ 
       || /\     ____) | (_| | | | | | | |_) | |  __/
    /\ ||//\)   |_____/ \__,_|_| |_| |_| .__/|_|\___|
   (/\||/                             |_|           
______\||/___________________________________________                     

ID: 437-0
Clear Text: 
 	text_classification_label: good
 	text: 이 영화를 보고 두통이 나았습니다. ㅠ ㅠ
Tokenized: 
 	tokens: ['이', '영화를', '보고', '두', '##통이', '나', '##았습니다', '.', '[UNK]', '[UNK]']
 	offsets: [0, 2, 6, 9, 10, 13, 14, 18, 20, 22]
 	start_of_word: [True, True, True, True, False, True, False, False, True, True]
Features: 
 	input_ids: [2, 2451, 25833, 8198, 917, 11765, 587, 21809, 17, 1,
      1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 	padding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 	segment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 	text_classification_label_ids: [1]
```

## Modeling Layers: AdaptiveModel = LanguageModel + PredictionHead

<center><img src="https://drive.google.com/uc?id=1OLWdr8rh7ucpF9t55gzVeMawMBJbRiEC" alt="Fine-tuning" width="60%" height="50%" align="center"></center>

* **LanguageModel**은 pretrained language models(BERT, XLNet ...)의 표준 클래스 
* **PredictionHead**는 모든 down-stream tasks(NER, Text classification, QA ...)를 표준 클래스
* **AdaptiveModel**은 위 두 가지 모들의 결합, 하나의 LanguageModel과 여러 개의 PredictionHead를 결합할 수 있다.

In [9]:
# LanguageModel: Build pretrained language model
EMBEDS_DROPOUT_PROB = 0.1
TASK_NAME = "text_classification"

language_model = LanguageModel.load(PRETRAINED_MODEL_NAME_OR_PATH, language="korean")
# PredictionHead: Build predictor layer
prediction_head = TextClassificationHead(
    num_labels=len(LABEL_LIST), 
    class_weights=data_silo.calculate_class_weights(
        task_name=TASK_NAME
    )
)
model = AdaptiveModel(
    language_model=language_model,
    prediction_heads=[prediction_head],
    embeds_dropout_prob=EMBEDS_DROPOUT_PROB,
    lm_output_types=["per_sequence"],
    device=device
)

03/29/2021 10:22:19 - INFO - farm.modeling.prediction_head -   Prediction head initialized with size [768, 2]
03/29/2021 10:22:19 - INFO - farm.modeling.prediction_head -   Using class weights for task 'text_classification': [0.9967563 1.0032649]


In [41]:
print(f"Model: {type(model)}")
for k, v in model.named_children():
    for k1, v1 in v.named_children():
        
        for k2, v2 in v1.named_children():
            print("----------------------------"*2)
            print(f"Module: {k} | Layer: {k2}")
            print("----------------------------"*2)
            if k2 == "encoder":
                print("Showing last layer")
                print(list(v2.children())[0][-1])
                break
            else:
                print(v2)

print("Last Dropout Layer")
print(model.dropout)

Model: <class 'farm.modeling.adaptive_model.AdaptiveModel'>
--------------------------------------------------------
Module: language_model | Layer: embeddings
--------------------------------------------------------
BertEmbeddings(
  (word_embeddings): Embedding(30000, 768, padding_idx=0)
  (position_embeddings): Embedding(300, 768)
  (token_type_embeddings): Embedding(2, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
)
--------------------------------------------------------
Module: language_model | Layer: encoder
--------------------------------------------------------
Showing last layer
BertLayer(
  (attention): BertAttention(
    (self): BertSelfAttention(
      (query): Linear(in_features=768, out_features=768, bias=True)
      (key): Linear(in_features=768, out_features=768, bias=True)
      (value): Linear(in_features=768, out_features=768, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (

In [44]:
from transformers import BertForSequenceClassification
bert = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME_OR_PATH)
print("----------------------------"*2)
print(f"Module: classifer")
print("----------------------------"*2)
print(bert.classifier)
print("----------------------------"*2)
print(f"Module: dropout")
print("----------------------------"*2)
print(bert.dropout)

Some weights of the model checkpoint at beomi/kcbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initiali

--------------------------------------------------------
Module: classifer
--------------------------------------------------------
Linear(in_features=768, out_features=2, bias=True)
--------------------------------------------------------
Module: dropout
--------------------------------------------------------
Dropout(p=0.1, inplace=False)


## Train & Eval & Inference

<center><img src="https://drive.google.com/uc?id=1bD54igqAn7T96gDCFZ2uxzFHpZIL5GOh" alt="Fine-tuning" width="60%" height="50%" align="center"></center>

### Train & Eval

In [46]:
LEARNING_RATE = 2e-5
N_EPOCHS = 1
N_GPU = 1
# Initialize Optimizer
model, optimizer, lr_schedule = initialize_optimizer(
    model=model,
    device=device,
    learning_rate=LEARNING_RATE,
    n_batches=len(data_silo.loaders["train"]),
    n_epochs=N_EPOCHS
)
# Trainer
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    lr_schedule=lr_schedule,
    data_silo=data_silo,
    epochs=N_EPOCHS,
    n_gpu=N_GPU,
    device=device, 
)
# now train!
model = trainer.train()
model.save(checkpoint_path)

03/29/2021 10:52:16 - INFO - farm.modeling.optimization -   Loading optimizer `TransformersAdamW`: '{'correct_bias': False, 'weight_decay': 0.01, 'lr': 2e-05}'
03/29/2021 10:52:16 - INFO - farm.modeling.optimization -   Using scheduler 'get_linear_schedule_with_warmup'
03/29/2021 10:52:16 - INFO - farm.modeling.optimization -   Loading schedule `get_linear_schedule_with_warmup`: '{'num_warmup_steps': 1674.7, 'num_training_steps': 16747}'
03/29/2021 10:52:16 - INFO - farm.train -   
 

          &&& &&  & &&             _____                   _             
      && &\/&\|& ()|/ @, &&       / ____|                 (_)            
      &\/(/&/&||/& /_/)_&/_&     | |  __ _ __ _____      ___ _ __   __ _ 
   &() &\/&|()|/&\/ '%" & ()     | | |_ | '__/ _ \ \ /\ / / | '_ \ / _` |
  &_\_&&_\ |& |&&/&__%_/_& &&    | |__| | | | (_) \ V  V /| | | | | (_| |
&&   && & &| &| /& & % ()& /&&    \_____|_|  \___/ \_/\_/ |_|_| |_|\__, |
 ()&_---()&\&\|&&-&&--%---()~                                     

KeyboardInterrupt: ignored

### Inference

In [None]:
import os
os.chdir("/content/drive/MyDrive/ColabNotebooks")

In [23]:
from farm.infer import Inferencer
from pprint import PrettyPrinter

basic_texts = [
    {"text": "기생충,,, 이 영화 정말 재밌네요."},
    {"text": "황정민 나오는 영화는 다 볼만한듯."},
]

infer_model = Inferencer.load(
    model_name_or_path="./ckpt/NSMC",
    task_type="text_classification"
)
result = infer_model.inference_from_dicts(dicts=basic_texts)
print()

03/29/2021 11:39:09 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None
03/29/2021 11:39:11 - INFO - farm.modeling.adaptive_model -   Found files for loading 1 prediction heads
03/29/2021 11:39:11 - INFO - farm.modeling.prediction_head -   Prediction head initialized with size [768, 2]
03/29/2021 11:39:11 - INFO - farm.modeling.prediction_head -   Using class weights for task 'text_classification': [0.9966925978660583, 1.0033293962478638]
03/29/2021 11:39:11 - INFO - farm.modeling.prediction_head -   Loading prediction head from ckpt/NSMC/prediction_head_0.bin
03/29/2021 11:39:12 - INFO - farm.modeling.tokenization -   Loading tokenizer of type 'BertTokenizer'
03/29/2021 11:39:12 - INFO - farm.data_handler.processor -   Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()
03/29/2021 11:39:12 - INFO - farm.util







In [24]:
PrettyPrinter().pprint(result)

[{'predictions': [{'context': '기생충,,, 이 영화 정말 재밌네요.',
                   'end': None,
                   'label': 'good',
                   'probability': 0.83329886,
                   'start': None},
                  {'context': '황정민 나오는 영화는 다 볼만한듯.',
                   'end': None,
                   'label': 'good',
                   'probability': 0.7448745,
                   'start': None}],
  'task': 'text_classification'}]


# MLflow

public mlflow: https://public-mlflow.deepset.ai/#/experiments/313/runs/f7f5999c30194f1d964d0693e683be62

<center><img src="https://drive.google.com/uc?id=1ip_--tgyo0M3V0mpGnONw0jTP2fv8sOE" alt="Fine-tuning" width="30%" height="50%" align="center"></center>

<center><img src="https://drive.google.com/uc?id=18PmyeDB4xsgrFVOmttsq0eiyFPzDm9IA" alt="Fine-tuning" width="90%" height="50%" align="center"></center>

# TASK Supported

|Task|BERT|RoBERTa*|XLNet|ALBERT|DistilBERT|XLMRoBERTa|ELECTRA|MiniLM|
|---|---|---|---|---|---|---|---|---|
|Text classification|x|x|x|x|x|x|x|x|
|NER|x|x|x|x|x|x|x|x|
|Question Answering|x|x|x|x|x|x|x|x|
|Language Model Fine-tuning|x||||||||
|Text Regression|x|x|x|x|x|x|x|x|
|Multilabel Text classif.|x|x|x|x|x|x|x|x|
|Extracting embeddings|x|x|x|x|x|x|x|x|
|LM from scratch|x||||||||
|Text Pair Classification|x|x|x|x|x|x|x|x|
|Passage Ranking|x|x|x|x|x|x|x|x|
|Document retrieval (DPR)|x|x||x|x|x|x|x|

# Compare to others

<center><img src="https://drive.google.com/uc?id=1TZoRpza8-o4wSTr0s16f8hHQRroLQg30" alt="Fine-tuning" width="90%" height="50%" align="center"></center>

## FARM 장단점

장점:

* 데이터 세트만 준비되어 있으면, 다른 패키지에 비해 상대적으로 설정 할 것이 적음
* 훈련 속도가 빠르고, 실험 기록 및 관리이 편리해서 빠르게 실험해 볼 수 있다.

단점: 

* customization이 상대적으로 힘듦