In [1]:
import sys

sys.path.append("..")

In [2]:
import random

from sklearn.model_selection import train_test_split

from train.training_utils import load_datasets, evaluate

In [3]:
vectors = load_datasets(
    "../../ingredient-parser/train/data/training.sqlite3",
    "en",
    ["bbc", "cookstr", "nyt"],
)
(
    sentences_train,
    sentences_test,
    features_train,
    features_test,
    truth_train,
    truth_test,
    source_train,
    source_test,
) = train_test_split(
    vectors.sentences,
    vectors.features,
    vectors.labels,
    vectors.source,
    test_size=0.2,
    random_state=165846843,
    stratify=vectors.source,
)

[INFO] Loading and transforming training data.
[INFO] 59,933 usable vectors.
[INFO] 67 discarded due to OTHER labels.


In [4]:
from ap.ingredient_tagger import IngredientTagger

In [5]:
tagger = IngredientTagger()
tagger.model.labels = {
    "QTY",
    "UNIT",
    "NAME",
    "PREP",
    "COMMENT",
    "PURPOSE",
    "PUNC",
    "SIZE",
}

In [6]:
%%time
random.seed(4564138)
tagger.train(features_train, truth_train)

Iter 0: 95.7%
Iter 1: 96.9%
Iter 2: 97.2%
Iter 3: 97.5%
Iter 4: 97.6%
Iter 5: 97.7%
Iter 6: 97.8%
Iter 7: 97.9%
Iter 8: 97.9%
Iter 9: 98.0%
CPU times: user 1min 8s, sys: 78.3 ms, total: 1min 8s
Wall time: 1min 8s


In [7]:
predicted_labels = [
    tagger.tag_from_features(feats) for feats in features_test
]

stats = evaluate(truth_test, predicted_labels)
print("Sentence-level results:")
print(f"\tAccuracy: {100*stats.sentence.accuracy:.2f}%")

print()
print("Word-level results:")
print(f"\tAccuracy {100*stats.token.accuracy:.2f}%")
print(f"\tPrecision (micro) {100*stats.token.weighted_avg.precision:.2f}%")
print(f"\tRecall (micro) {100*stats.token.weighted_avg.recall:.2f}%")
print(f"\tF1 score (micro) {100*stats.token.weighted_avg.f1_score:.2f}%")

Sentence-level results:
	Accuracy: 93.58%

Word-level results:
	Accuracy 97.63%
	Precision (micro) 97.65%
	Recall (micro) 97.63%
	F1 score (micro) 97.64%
