In [None]:
# Install flair - an NLP package
#!pip install flair

In [None]:
## NOTE: Before proceeding on Google Colab, convert csv files obtained from 
## 'Pre-Processing Flair' into .txt files and upload them onto Colab using
## the left pane.

#  Build a corpus using flair library
from flair.data import Corpus
from flair.datasets import ColumnCorpus

# Define columns
columns = {0 : 'text', 1 : 'ner'}

# Indicate directory where the data resides
data_folder = "/content/"

# Initialize the corpus by loading training, testing and validation data
corpus: Corpus = ColumnCorpus(data_folder, columns,train_file = 'flair_train2.txt',test_file = 'flair_test2.txt',dev_file = 'flair_dev2.txt')

In [None]:
#print(corpus.train[55].to_tagged_string('ner'))
#print(corpus.train[521].to_tagged_string('pos'))

In [None]:
# Inform flair which tag to predict
tag_type = 'ner'

# Create a tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

In [None]:
## Only run this if you intend to use ELMo embeddings

#!pip install allennlp==0.9.0

In [None]:
## Build stacked embeddings

from flair.embeddings import WordEmbeddings, FlairEmbeddings, TransformerWordEmbeddings, ELMoEmbeddings

# Init standard GloVe embedding
glove_embedding = WordEmbeddings('glove')

# Init BERT embedding
bert_embedding = TransformerWordEmbeddings('bert-base-uncased')

# Init ELMo embedding
elmo_embedding = ELMoEmbeddings()

# Init Flair forward and backwards embeddings
flair_embedding_forward = FlairEmbeddings('news-forward')
flair_embedding_backward = FlairEmbeddings('news-backward')

In [None]:
from flair.embeddings import StackedEmbeddings

# Create a StackedEmbedding object that combines embeddings
# Comment to enable/disable embeddings
# Recommended embeddings: GloVe, ELMo, flair forward & backward

embeddings : StackedEmbeddings = StackedEmbeddings([
                                        glove_embedding,
                                        #bert_embedding,
                                        #elmo_embedding,
                                        flair_embedding_forward,
                                        flair_embedding_backward,
                                       ])

In [None]:
## Create a sequence tagging model

%%time
from flair.models import SequenceTagger
tagger : SequenceTagger = SequenceTagger(hidden_size=256,
                                       embeddings=embeddings,
                                       tag_dictionary=tag_dictionary,
                                       tag_type=tag_type,
                                       use_crf=True)
print(tagger)

# Train the model until loss is minimized
# Recommended learning rate = 0.1
# Use smaller mini_batch_size if training is too slow

from flair.trainers import ModelTrainer
trainer : ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

In [None]:
# Load and test model

from flair.data import Sentence
from flair.models import SequenceTagger

# Load the trained model
model = SequenceTagger.load('/content/resources/taggers/example-ner/final-model.pt')

# Create example sentence for qualitative evaluation
sentence = Sentence('The boy kicked a blue ball at me out of the blue')

# Check if the model predicted the tags
model.predict(sentence)
print(sentence.to_tagged_string())


In [None]:
# For longer sentences/articles to be displayed within the width of the window

# create example sentence containing tags to predict
sentence = Sentence('"Orange. I think that because I’m really into colors, I’m attracted to the ones that make you feel happy. And orange is such a happy and warm color. I love every palette around the orange color and I love mixing it with green—that I’ve been wearing a lot for a while."')
# predict the tags
model.predict(sentence)
#print(sentence.to_tagged_string())

from IPython.display import HTML
display(HTML('''
<style>
  pre {
      white-space: normal;
  }
</style>
'''))
print(sentence.to_tagged_string())
