In [2]:
from denver.data.data_source import DenverDataSource

train_path = './data/salebot/train.csv'
test_path = './data/salebot/test.csv'

data_source = DenverDataSource.from_csv(train_path=train_path,
                                        test_path=test_path,
                                        text_col='text',
                                        label_col='intent',
                                        lowercase=True, 
                                        rm_special_token=True, 
                                        rm_url=True, 
                                        rm_emoji=True)

In [3]:
from denver.trainers.language_model_trainer import LanguageModelTrainer

lm_trainer = LanguageModelTrainer(pretrain='babe')
lm_trainer.fine_tuning_from_df(data_df=data_source.train.data,
                            batch_size= 128,
                            num_epochs=10,
                            learning_rate=1e-3,
                            moms=[0.8, 0.7],
                            drop_mult=0.5)


➖➖➖➖➖➖➖➖➖➖ FINE-TUNING ➖➖➖➖➖➖➖➖➖➖



epoch,train_loss,valid_loss,accuracy,time
0,4.500978,3.869695,0.272573,00:01
1,4.133433,3.566526,0.28245,00:00


epoch,train_loss,valid_loss,accuracy,time
0,3.668547,3.46162,0.286133,00:00
1,3.538336,3.034924,0.334263,00:00
2,3.32478,2.707021,0.383901,00:00
3,3.153852,2.608732,0.400251,00:00
4,3.030699,2.541165,0.410631,00:00
5,2.937804,2.503082,0.417997,00:00
6,2.866714,2.486787,0.420703,00:00
7,2.812361,2.484737,0.421289,00:00


In [4]:
from denver.learners import ULMFITClassificationLearner
from denver.trainers.trainer import ModelTrainer

learn = ULMFITClassificationLearner(mode='training', data_source=data_source)

trainer = ModelTrainer(learn=learn)
trainer.train(base_path='./models/intent/', 
              model_file='denver.pkl', 
              learning_rate=2e-2, 
              batch_size=128, 
              num_epochs=14)


➖➖➖➖➖➖➖➖➖➖ TRAINING ➖➖➖➖➖➖➖➖➖➖



epoch,train_loss,valid_loss,accuracy,f_beta,precision,recall,time
0,0.825038,0.800286,0.818898,0.811269,0.857467,0.818898,00:01
1,0.501912,0.283712,0.912073,0.911795,0.913461,0.912073,00:00
2,0.330435,0.229726,0.929134,0.929342,0.931849,0.929134,00:00


epoch,train_loss,valid_loss,accuracy,f_beta,precision,recall,time
0,0.151974,0.351507,0.909449,0.909621,0.911782,0.909449,00:00
1,0.224414,0.342895,0.918635,0.918469,0.92129,0.918635,00:00
2,0.190828,0.291064,0.931759,0.931732,0.933447,0.931759,00:00


epoch,train_loss,valid_loss,accuracy,f_beta,precision,recall,time
0,0.247263,0.360245,0.91601,0.916126,0.918462,0.916011,00:00


epoch,train_loss,valid_loss,accuracy,f_beta,precision,recall,time
0,0.160332,0.250997,0.930446,0.930649,0.932393,0.930446,00:00


epoch,train_loss,valid_loss,accuracy,f_beta,precision,recall,time
0,0.089256,0.244898,0.933071,0.932944,0.9346,0.933071,00:00
1,0.074915,0.224993,0.939633,0.939482,0.940544,0.939633,00:00
2,0.059476,0.237574,0.944882,0.944872,0.946305,0.944882,00:00
3,0.04934,0.23811,0.94357,0.943495,0.944467,0.94357,00:00
4,0.042082,0.247935,0.940945,0.940791,0.942367,0.940945,00:00
5,0.040215,0.241235,0.946194,0.945983,0.946875,0.946194,00:00


2020-12-17 03:16:12,141 INFO  denver.learners.ulmfit_cls_learner:197 - Save the model...
2020-12-17 03:16:19,777 INFO  denver.learners.ulmfit_cls_learner:699 - Path to the saved model: /workspace/nlp_training/phucpx/tutorial/denver/models/intent/denver.pkl



⏰  The trained time: 0:00:30.659655


░▒▓█  Evaluated Valid: 
+--------------------+--------------------+--------------------+--------------------+-------------------+
|        loss        |        acc         |         f1         |     precision      |      recall       |
+--------------------+--------------------+--------------------+--------------------+-------------------+
| 0.2990902364253998 | 0.9225721955299377 | 0.9226223826408386 | 0.9255156517028809 | 0.922572135925293 |
+--------------------+--------------------+--------------------+--------------------+-------------------+


In [5]:
# evaluate the test set in data source
metrics = learn.evaluate()

from pprint import pprint
pprint(metrics)

2020-12-17 03:16:19,978 INFO  denver.learners.ulmfit_cls_learner:353 - Evaluating...


{'acc': [0.9266247379454927],
 'cls_detailed_results': '                      precision    recall  '
                         'f1-score   support\n'
                         '\n'
                         'ask_about_age_of_use       0.98      0.92      '
                         '0.95        66\n'
                         ' ask_about_available       0.89      0.86      '
                         '0.88        37\n'
                         '     ask_about_brand       0.93      0.86      '
                         '0.90        80\n'
                         '     ask_about_color       0.98      0.94      '
                         '0.96        66\n'
                         ' ask_about_guarantee       0.97      0.93      '
                         '0.95        60\n'
                         '     ask_about_image       0.91      1.00      '
                         '0.95        69\n'
                         '      ask_about_link       1.00      0.88      '
                         '0.93  

In [6]:
text = "Sữa tắm arun bán ở link nào shop"

# Output to rasa-format
output = learn.process(sample=text, 
                       lowercase=True, 
                       rm_special_token=True, 
                       rm_url=True, 
                       rm_emoji=True)

from pprint import pprint
pprint(output)

{'intent': {'confidence': 0.5957598686218262, 'name': 'ask_about_origin'},
 'intent_ranking': [{'confidence': 4.442536737769842e-05,
                     'name': 'ask_about_age_of_use'},
                    {'confidence': 9.711595339467749e-05,
                     'name': 'ask_about_available'},
                    {'confidence': 0.2590196132659912,
                     'name': 'ask_about_brand'},
                    {'confidence': 1.65079709404381e-05,
                     'name': 'ask_about_color'},
                    {'confidence': 0.0019619252998381853,
                     'name': 'ask_about_guarantee'},
                    {'confidence': 0.00027482578298076987,
                     'name': 'ask_about_image'},
                    {'confidence': 0.04370839521288872,
                     'name': 'ask_about_link'},
                    {'confidence': 0.0004990660236217082,
                     'name': 'ask_about_material'},
                    {'confidence': 0.5957598686218262,
    

In [9]:
## Evaluate from other dataset

path = './data/salebot/test.csv'

learn = ULMFITClassificationLearner(mode='inference', model_path='./models/intent/denver.pkl')

metrics = learn.evaluate(data=path, text_col='text', label_col='intent', 
                         lowercase=True, rm_emoji=True, rm_url=True, rm_special_token=True)

pprint(metrics)

2020-12-17 03:17:05,538 INFO  denver.learners.ulmfit_cls_learner:353 - Evaluating...


{'acc': [0.9276729559748428],
 'cls_detailed_results': '                      precision    recall  '
                         'f1-score   support\n'
                         '\n'
                         'ask_about_age_of_use       0.98      0.92      '
                         '0.95        66\n'
                         ' ask_about_available       0.86      0.84      '
                         '0.85        37\n'
                         '     ask_about_brand       0.93      0.86      '
                         '0.90        80\n'
                         '     ask_about_color       0.97      0.92      '
                         '0.95        66\n'
                         ' ask_about_guarantee       0.97      0.93      '
                         '0.95        60\n'
                         '     ask_about_image       0.92      1.00      '
                         '0.96        69\n'
                         '      ask_about_link       1.00      0.91      '
                         '0.95  

#### With model ULMFIT, we provide method get uncertainty score

In [11]:
text = "Làm bằng chất liệu j vậy shop"

uncertainty_score = learn.get_uncertainty_score(sample=text, n_times=10)

pprint(uncertainty_score)

{'intent': 'ask_about_material',
 'method': 'entropy',
 'text': 'làm bằng chất liệu j vậy shop',
 'uncertainty_score': 0.00023842701921239495}
