Main resource: https://github.com/flairNLP/flair/blob/master/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md

In [1]:
import flair
import numpy as np
import pandas as pd
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
flair.device = device

print(flair.device)

cuda


In [2]:
print(torch.__version__)

1.7.1+cu110


In [3]:
from flair.data import Corpus
from flair.datasets import CSVClassificationCorpus
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings, DocumentRNNEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer

data_folder = '../data/corpuslow_13042021'
column_name_map = {1: "text", 2: "label_topic"}

# 1. get the corpus
corpus: Corpus = CSVClassificationCorpus(data_folder,
                                         column_name_map,
                                         skip_header=True) 

# 2. create the label dictionary
label_dict = corpus.make_label_dictionary()

print(label_dict)

2021-04-17 14:06:04,922 Reading data from ..\data\corpuslow_13042021
2021-04-17 14:06:04,968 Train: ..\data\corpuslow_13042021\train.csv
2021-04-17 14:06:04,969 Dev: ..\data\corpuslow_13042021\dev.csv
2021-04-17 14:06:04,969 Test: ..\data\corpuslow_13042021\test.csv
2021-04-17 14:06:05,001 Computing label dictionary. Progress:


100%|███████████████████████████████████████| 976/976 [00:01<00:00, 887.29it/s]

2021-04-17 14:06:23,247 [b'0', b'1']
Dictionary with 2 tags: 0, 1





In [4]:
# 3. make a list of word embeddings
word_embeddings = [WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward')]

# 4. initialize document embedding by passing list of word embeddings
document_embeddings = DocumentRNNEmbeddings(word_embeddings, hidden_size=256, bidirectional=True,rnn_type="LSTM")

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('./flair/bilstm_low',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=5)

2021-04-17 14:06:34,611 ----------------------------------------------------------------------------------------------------
2021-04-17 14:06:34,612 Model: "TextClassifier(
  (document_embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('glove')
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
          (decoder): Linear(in_features=2048, out_features=300, bias=True)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
          (decoder): Linear(in_features=2048, out_features=300, bias=True)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=4196, out_features=4196, bias=True)
    (rnn): LSTM(4196, 256,

{'test_score': 0.6789,
 'dev_score_history': [0.6759, 0.6759, 0.6759, 0.6759, 0.6759],
 'train_loss_history': [0.7041722525443349,
  0.662722786622388,
  0.6482572640691485,
  0.6370561399630138,
  0.6294849312731198],
 'dev_loss_history': [0.8965303897857666,
  0.6289856433868408,
  0.6325303316116333,
  0.667435348033905,
  0.7137273550033569]}