# fastText

<https://fasttext.cc/docs/en/supervised-tutorial.html>

In [21]:
import fasttext
# help(fasttext.FastText)

## Train

In [28]:
model = fasttext.train_supervised(input="cooking.train")
model.save_model("model_cooking.bin")

## Test

In [24]:
model.predict("Which baking dish is best to bake a banana bread ?")

(('__label__food-safety',), array([0.06507698]))

In [23]:
model.predict("Why not put knives in the dishwasher?")

(('__label__food-safety',), array([0.06507698]))

In [26]:
model.test("cooking.valid")

(3000, 0.14133333333333334, 0.06112152227187545)

In [27]:
model.test("cooking.valid", k=5)

(3000, 0.06606666666666666, 0.14285714285714285)

In [29]:
model.predict("Why not put knives in the dishwasher?", k=5)

(('__label__baking',
  '__label__food-safety',
  '__label__bread',
  '__label__equipment',
  '__label__substitutions'),
 array([0.07211091, 0.06693368, 0.04350004, 0.03240925, 0.03079349]))

## Improve

In [31]:
model = fasttext.train_supervised(input="cooking.train")

In [33]:
model.test("cooking.valid")

(3000, 0.14533333333333334, 0.06285137667579645)

In [34]:
model = fasttext.train_supervised(input="cooking.train", epoch=25)

In [35]:
model.test("cooking.valid")

(3000, 0.5216666666666666, 0.22560184517803084)

In [36]:
model = fasttext.train_supervised(input="cooking.train", lr=1.0)

In [37]:
model.test("cooking.valid")

(3000, 0.5716666666666667, 0.2472250252270434)

In [38]:
model = fasttext.train_supervised(input="cooking.train", lr=1.0, epoch=25)

In [39]:
model.test("cooking.valid")

(3000, 0.5943333333333334, 0.25702753351592905)

In [40]:
model = fasttext.train_supervised(input="cooking.train", lr=1.0, epoch=25, wordNgrams=2)

In [41]:
model.test("cooking.valid")

(3000, 0.609, 0.26337033299697277)

In [42]:
model = fasttext.train_supervised(input="cooking.train", lr=1.0, epoch=25, wordNgrams=2, bucket=200000, dim=50, loss='hs')

In [43]:
model.test("cooking.valid")

(3000, 0.583, 0.25212627937148624)

## Multi-Label Classification

In [44]:
model = fasttext.train_supervised(input="cooking.train", lr=0.5, epoch=25, wordNgrams=2, bucket=200000, dim=50, loss='ova')

In [46]:
model.predict("Which baking dish is best to bake a banana bread ?", k=-1, threshold=0.5)

(('__label__baking',
  '__label__bread',
  '__label__equipment',
  '__label__bananas'),
 array([1.00001001, 0.98410362, 0.92631376, 0.84797776]))

In [47]:
model.test("cooking.valid", k=-1)

(3000, 0.003146031746031746, 1.0)