In [1]:
from denver.data import DenverDataSource
from denver.embeddings import Embeddings

In [2]:
embeddings = Embeddings(embedding_types='pooled_flair_embeddings',  
                        pretrain='multi-forward')

embedding = embeddings.embed()

**NOTE:**

You can also fine-tuning language model as embedding from Other Corpus. The structure of a Folder Data as followings:

```js
corpus/
    train/
        train_split_1
        train_split_2
        ...
        train_split_X

    test.txt
    valid.txt
```
Usage code:
```py
embedding = embeddings.fine_tuning(corpus_dir='./data/corpus', 
                                   model_dir='./models', 
                                   batch_size=32, 
                                   max_epoch=1)
```

In [4]:
## Path to train data and test data
train_path = './data/cometv3/train.csv'
test_path = './data/cometv3/test.csv'

data_source = DenverDataSource.from_csv(train_path=train_path, 
                                        test_path=test_path, 
                                        text_col='text', 
                                        label_col='tag', 
                                        lowercase=True)

In [5]:
from denver.learners import FlairSequenceTaggerLearner

learn = FlairSequenceTaggerLearner(mode='training', 
                                   data_source=data_source, 
                                   tag_type='ner', 
                                   embeddings=embedding,
                                   hidden_size=1024,
                                   rnn_layers=1,
                                   dropout=0.0, 
                                   word_dropout=0.05, 
                                   locked_dropout=0.5, 
                                   reproject_embeddings=2048, 
                                   use_crf=True)

2020-12-17 03:35:23,916 Reading data from /tmp/tmpq789_l31
2020-12-17 03:35:23,916 Train: /tmp/tmpq789_l31/train.txt
2020-12-17 03:35:23,917 Dev: None
2020-12-17 03:35:23,917 Test: /tmp/tmpq789_l31/test.txt


In [6]:
trainer = ModelTrainer(learn=learn)
trainer.train(model_dir=model_dir, 
              save_best_model=save_file, 
              learning_rate=0.1, 
              batch_size=32, 
              max_epochs=300)

### Evaluate

In [7]:
# evaluate the test set in data source 
metrics = learn.evaluate()

from pprint import pprint
pprint(metrics) 

**NOTE:**

- Maybe, you can also evalute with any test dataset from .csv file.

```py
test_path = './data/test.csv'

metrics = learn.evaluate(data=test_path, 
                         text_cols='sentence', 
                         label_cols='ner', 
                         lowercase=True)
```

- Additionaly, You can also load the model from file.

```py
test_path = './data/test.csv'
model_path = './models/denver-ner-salebot.pt'

model = FlairSequenceTagger(mode='inference', model_path=model_path)

metrics = learn.evaluate(data=test_path, 
                         text_cols='sentence', 
                         label_cols='ner', 
                         lowercase=True)
```

### Get the prediction

In [8]:
text = 'shop có ghế ăn ko , xe đẩy còn k sh'
model_path = './models/ner/vi_nerr.pt'

learn = FlairSequenceTaggerLearner(mode='inference', model_path=model_path)

output = learn.process(sample=text, lowercase=False)

from pprint import pprint
pprint(output)