In [None]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, Sentence, FlairEmbeddings
from typing import List
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from flair.visual.training_curves import Plotter
from hyperopt import hp
from flair.hyperparameter.param_selection import SearchSpace, Parameter, SequenceTaggerParamSelector, OptimizationValue
from flair.training_utils import EvaluationMetric

In [None]:
# 1. get the corpus

# define columns
columns = {0: 'text', 1: 'key_phrase', 2: 'kp_type'}

# this is the folder in which train, test and dev files reside
data_folder = 'corpus_data/'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns)

In [None]:
print(len(corpus.train))
print(len(corpus.dev))
print(len(corpus.test))

In [None]:
# 2. what tag do we want to predict?
tag_type = 'key_phrase'

In [None]:
# . initialize embeddings
embedding_types: List[TokenEmbeddings] = [

    WordEmbeddings('glove'),

    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use flair embeddings
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

In [None]:
# 3. define your search space
search_space = SearchSpace()
search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[ embeddings
     #WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward')]
])
search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[256])
search_space.add(Parameter.DROPOUT, hp.choice, options=[0.08, 0.12])
search_space.add(Parameter.LEARNING_RATE, hp.choice, options=[0.17])
search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[16])

In [None]:
# 4. create the parameter selector
param_selector = SequenceTaggerParamSelector(
    corpus=corpus, 
    tag_type=tag_type, 
    base_path='resources/opt_results2', 
    max_epochs=60, 
    evaluation_metric=EvaluationMetric.MICRO_F1_SCORE,
    training_runs=1,
    optimization_value=OptimizationValue.DEV_SCORE
)

In [None]:
# 5 start the optimization
param_selector.optimize(search_space, max_evals=2)