In [1]:
from flair.models import TextClassifier
from flair.data import Sentence

In [2]:
classifier = TextClassifier.load('en-sentiment')

sentence = Sentence('Flair is pretty neat!')
classifier.predict(sentence)

print('Sentence above is: ', sentence.labels)

Sentence above is:  [POSITIVE (1.0)]


In [3]:
sentence.get_token(3)

Token: 3 pretty

In [4]:
sentence[2]

Token: 3 pretty

In [5]:
for token in sentence:
    print(token)

Token: 1 Flair
Token: 2 is
Token: 3 pretty
Token: 4 neat!


In [11]:
## commanding `kaggle datasets download -d uciml/sms-spam-collection-dataset` on kernel
import pandas as pd
data = pd.read_csv("spam.csv", encoding='latin-1').sample(frac=1).drop_duplicates()
data.describe()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
count,5169,5169,43,10,5
unique,2,5169,43,10,5
top,ham,No need to ke qi... ÌÏ too bored izzit y sudde...,"bt not his girlfrnd... G o o d n i g h t . . .@""",why to miss them,"one day these two will become FREINDS FOREVER!"""
freq,4516,1,1,1,1


In [12]:
data = data[['v1', 'v2']].rename(columns={"v1": "label", "v2": "text"})
data['label'] = '__label__' + data['label'].astype(str)
data

Unnamed: 0,label,text
3546,__label__spam,Rock yr chik. Get 100's of filthy films &XXX p...
3347,__label__ham,"Sorry, I'll call later"
2788,__label__ham,ÌÏ got wat to buy tell us then Ì_ no need to c...
3375,__label__ham,"Good afternon, my love. How are today? I hope ..."
1099,__label__ham,Been up to ne thing interesting. Did you have ...
334,__label__spam,Valentines Day Special! Win over å£1000 in our...
1272,__label__ham,Ok...
5062,__label__ham,Ok i also wan 2 watch e 9 pm show...
3893,__label__spam,Dear Dave this is your final notice to collect...
3004,__label__ham,Ew are you one of them?


In [15]:
data.iloc[0:int(len(data)*0.8)].to_csv('train.csv', sep='\t', index = False, header = False)
data

Unnamed: 0,label,text
3546,__label__spam,Rock yr chik. Get 100's of filthy films &XXX p...
3347,__label__ham,"Sorry, I'll call later"
2788,__label__ham,ÌÏ got wat to buy tell us then Ì_ no need to c...
3375,__label__ham,"Good afternon, my love. How are today? I hope ..."
1099,__label__ham,Been up to ne thing interesting. Did you have ...
334,__label__spam,Valentines Day Special! Win over å£1000 in our...
1272,__label__ham,Ok...
5062,__label__ham,Ok i also wan 2 watch e 9 pm show...
3893,__label__spam,Dear Dave this is your final notice to collect...
3004,__label__ham,Ew are you one of them?


In [20]:
data.iloc[0:int(len(data)*0.8)].to_csv('train.csv', sep='\t', index = False, header = False)
data.iloc[int(len(data)*0.8):int(len(data)*0.9)].to_csv('test.csv', sep='\t', index = False, header = False)
data.iloc[int(len(data)*0.9):].to_csv('dev.csv', sep='\t', index = False, header = False);

In [21]:
from flair.data_fetcher import NLPTaskDataFetcher
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentLSTMEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from pathlib import Path

In [28]:
corpus = NLPTaskDataFetcher.load_classification_corpus(Path('./'), test_file='test.csv', dev_file='dev.csv', train_file='train.csv')

document_embeddings = DocumentLSTMEmbeddings(word_embeddings, hidden_size=512, reproject_words=True, reproject_words_dimension=256)

2018-12-29 22:24:18,256 Reading data from .
2018-12-29 22:24:18,257 Train: train.csv
2018-12-29 22:24:18,257 Dev: dev.csv
2018-12-29 22:24:18,258 Test: test.csv


In [29]:
classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(), multi_label=False)

In [30]:
trainer = ModelTrainer(classifier, corpus)
trainer.train('./', max_epochs=10)

2018-12-29 22:24:53,503 ----------------------------------------------------------------------------------------------------
2018-12-29 22:24:53,504 Evaluation method: MICRO_F1_SCORE
2018-12-29 22:24:53,506 ----------------------------------------------------------------------------------------------------
2018-12-29 22:24:57,800 epoch 1 - iter 0/130 - loss 0.02254135
2018-12-29 22:25:40,268 epoch 1 - iter 13/130 - loss 0.01204099
2018-12-29 22:26:15,462 epoch 1 - iter 26/130 - loss 0.00999340
2018-12-29 22:26:58,772 epoch 1 - iter 39/130 - loss 0.00891003
2018-12-29 22:27:32,914 epoch 1 - iter 52/130 - loss 0.00799466
2018-12-29 22:28:11,684 epoch 1 - iter 65/130 - loss 0.00722255
2018-12-29 22:28:43,841 epoch 1 - iter 78/130 - loss 0.00653092
2018-12-29 22:29:26,379 epoch 1 - iter 91/130 - loss 0.00592038
2018-12-29 22:30:05,074 epoch 1 - iter 104/130 - loss 0.00560936
2018-12-29 22:30:41,490 epoch 1 - iter 117/130 - loss 0.00524558
2018-12-29 22:31:08,392 ---------------------------

2018-12-29 22:38:50,942 epoch 8 - iter 13/130 - loss 0.00098233
2018-12-29 22:38:55,841 epoch 8 - iter 26/130 - loss 0.00081115
2018-12-29 22:39:01,740 epoch 8 - iter 39/130 - loss 0.00077291
2018-12-29 22:39:07,279 epoch 8 - iter 52/130 - loss 0.00083412
2018-12-29 22:39:13,470 epoch 8 - iter 65/130 - loss 0.00102800
2018-12-29 22:39:19,906 epoch 8 - iter 78/130 - loss 0.00133887
2018-12-29 22:39:25,475 epoch 8 - iter 91/130 - loss 0.00149230
2018-12-29 22:39:30,152 epoch 8 - iter 104/130 - loss 0.00141488
2018-12-29 22:39:35,350 epoch 8 - iter 117/130 - loss 0.00141031
2018-12-29 22:39:40,777 ----------------------------------------------------------------------------------------------------
2018-12-29 22:39:40,778 EPOCH 8 done: loss 0.0015 - lr 0.1000 - bad epochs 0
2018-12-29 22:39:42,249 DEV  : loss 0.00350079 - f-score 0.9691 - acc 0.9691
2018-12-29 22:39:44,107 TEST : loss 0.00208841 - f-score 0.9806 - acc 0.9806
2018-12-29 22:39:48,439 ------------------------------------------

{'test_score': 0.9864,
 'dev_score_history': [0.9555,
  0.9729,
  0.9671,
  0.9826,
  0.9787,
  0.9826,
  0.9826,
  0.9691,
  0.9787,
  0.9807],
 'train_loss_history': [0.005179632597662225,
  0.0026449494521282695,
  0.0023164576132434223,
  0.002160288070951746,
  0.0020395224297283404,
  0.0018013223191891492,
  0.0016384062971437083,
  0.0014514075844783447,
  0.0013020341024055381,
  0.0012984403613643515],
 'dev_loss_history': [0.005003971979022026,
  0.0025356565602123737,
  0.0029122158885002136,
  0.002164277480915189,
  0.0024227711837738752,
  0.0019227334996685386,
  0.0018321179086342454,
  0.0035007894039154053,
  0.0030762688256800175,
  0.0020050262100994587]}