In [1]:
# TODO DELETE ME
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# TODO

# find accuracy of Base model (using Spacy Root Verb Dependency)
# find accuracy using vanilla CRF Suite
# explore features and hyperparameters of CRF Suite Algo
# find accuracy using tweaked CRF Suite
# display results using Displacy
# interpret results using eli5

<IPython.core.display.Javascript object>

In [3]:
import warnings

warnings.filterwarnings("ignore")

<IPython.core.display.Javascript object>

In [4]:
import pandas as pd
import math

# to set random seed
import numpy as np

# used to create CRF model
from sklearn_crfsuite import CRF

# used to evaluate model
from sklearn_crfsuite import metrics

# for hyperparameter tuning parameter range
import scipy

# for specifying f1 metrics
from sklearn.metrics import make_scorer

# for cross validation of hyperparameters
from sklearn.model_selection import RandomizedSearchCV

# to visualize the weight of parameters of the fitted model
import eli5

<IPython.core.display.Javascript object>

In [5]:
pd.set_option("max_row", 600)

<IPython.core.display.Javascript object>

In [6]:
np.random.seed(42)

<IPython.core.display.Javascript object>

In [7]:
data = pd.read_csv("tagged_data.csv", index_col=0)

<IPython.core.display.Javascript object>

In [8]:
data.head(10)

Unnamed: 0,sentence#,word,pos,tag
0,0.0,Preheat,VB,U-Action
1,0.0,oven,NN,U-Utensil
2,0.0,to,IN,O
3,0.0,425,CD,O
4,0.0,degrees,NNS,O
5,0.0,F.,NN,O
6,1.0,Press,NN,U-Action
7,1.0,dough,NN,O
8,1.0,into,IN,O
9,1.0,the,DT,O


<IPython.core.display.Javascript object>

In [9]:
data["tag"].value_counts()

O               2498
U-Action         366
U-Ingredient     241
L-Ingredient      70
B-Ingredient      70
B-Utensil         60
U-Utensil         59
L-Utensil         59
I-Utensil         15
I-Ingredient       3
L-Action           2
B-Action           2
Name: tag, dtype: int64

<IPython.core.display.Javascript object>

In [10]:
print(len(data["word"].values))

3445


<IPython.core.display.Javascript object>

In [11]:
words = list(set(data["word"].values))
len(words)

869

<IPython.core.display.Javascript object>

In [12]:
agg_func = lambda s: [
    (w, p, t)
    for w, p, t in zip(
        s["word"].values.tolist(), s["pos"].values.tolist(), s["tag"].values.tolist()
    )
]

<IPython.core.display.Javascript object>

In [13]:
grouped = data.groupby("sentence#").apply(agg_func)

<IPython.core.display.Javascript object>

In [14]:
sentences = [s for s in grouped]

<IPython.core.display.Javascript object>

In [15]:
len(sentences)

264

<IPython.core.display.Javascript object>

In [16]:
def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]

    features = {
        # set the bias for each word
        "bias": 1.0,
        # get the lower case form of the word
        "word.lower()": word.lower(),
        # get last 3 letters for the word
        "word[-3:]": word[-3:],
        # get last 2 letters for the word
        "word[-2:]": word[-2:],
        # check whether the word is uppercase or not
        "word.isupper()": word.isupper(),
        # check whether the word is title case or not
        "word.istitle()": word.istitle(),
        # check whether the word is digit or not, useful to identifying quantities which will be tagged as 'O'
        "word.isdigit()": word.isdigit(),
        # specifying the pos for word
        "postag": postag,
        # get first 2 letters for the POS tag
        "postag[:2]": postag[:2],
    }

    # if word is starting of sentence
    if i > 0:

        # if word is not the beginning of sentence
        # then get the word before it i.e. i-1 index
        word1 = sent[i - 1][0]

        # then get the pos before it i.e. i-1 index
        postag1 = sent[i - 1][1]

        features.update(
            {
                # setting the lower form of word at index i-1
                "-1:word.lower()": word1.lower(),
                # checking if the word at index i-1 is titlecase
                "-1:word.istitle()": word1.istitle(),
                # checking if the word at index i-1 is uppercase
                "-1:word.isupper()": word1.isupper(),
                # setting the pos of word at index i-1
                "-1:postag": postag1,
                # get first 2 letters for the POS tag for i-1 indexed word
                "-1:postag[:2]": postag1[:2],
            }
        )
    else:
        # setting the BOS or Begining of sentence to True
        features["BOS"] = True

    # if word is at the end of sentence
    if i < len(sent) - 1:

        # if word is not the end of sentence
        # then get the word after it i.e. i+1 index
        word1 = sent[i + 1][0]

        # then get the pos after it i.e. i+1 index
        postag1 = sent[i + 1][1]

        features.update(
            {
                # setting the lower form of word at index i+1
                "+1:word.lower()": word1.lower(),
                # checking if the word at index i+1 is titlecase
                "+1:word.istitle()": word1.istitle(),
                # checking if the word at index i+1 is titlecase
                "+1:word.isupper()": word1.isupper(),
                # setting the pos of word at index i+1
                "+1:postag": postag1,
                # get first 2 letters for the POS tag for i+1 indexed word
                "+1:postag[:2]": postag1[:2],
            }
        )
    else:
        # setting the EOS or End of sentence to True
        features["EOS"] = True

    return features

<IPython.core.display.Javascript object>

In [17]:
def sent2features(sent):
    """Convert sentences which are lists containing (w, p, t) into features"""
    return [word2features(sent, i) for i in range(len(sent))]

<IPython.core.display.Javascript object>

In [18]:
def sent2labels(sent):
    """Retrieve all the labels from sentences which are lists containing (w, p, t)"""
    return [label for token, postag, label in sent]

<IPython.core.display.Javascript object>

In [19]:
X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]

<IPython.core.display.Javascript object>

In [20]:
# split into train and test
boundary = math.ceil(len(X) * 0.8)

<IPython.core.display.Javascript object>

In [21]:
boundary

212

<IPython.core.display.Javascript object>

In [22]:
# train data
x_train = X[:boundary]
y_train = y[:boundary]

# test data
x_test = X[boundary:]
y_test = y[boundary:]

<IPython.core.display.Javascript object>

In [23]:
print(len(x_train))
print(len(x_test))

212
52


<IPython.core.display.Javascript object>

In [24]:
# creating CRF model with Gradient Descent
crf = CRF(
    algorithm="lbfgs",
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=False,
)

<IPython.core.display.Javascript object>

In [25]:
# fitting the model using train data
crf.fit(x_train, y_train)

CRF(algorithm='lbfgs', all_possible_states=None, all_possible_transitions=False,
    averaging=None, c=None, c1=0.1, c2=0.1, calibration_candidates=None,
    calibration_eta=None, calibration_max_trials=None, calibration_rate=None,
    calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
    gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
    max_linesearch=None, min_freq=None, model_filename=None, num_memories=None,
    pa_type=None, period=None, trainer_cls=None, variance=None, verbose=False)

<IPython.core.display.Javascript object>

In [26]:
# to get all the labels/tags of data
labels = list(crf.classes_)

<IPython.core.display.Javascript object>

In [27]:
labels

['U-Action',
 'U-Utensil',
 'O',
 'B-Utensil',
 'L-Utensil',
 'U-Ingredient',
 'B-Ingredient',
 'L-Ingredient',
 'I-Utensil',
 'I-Ingredient',
 'B-Action',
 'L-Action']

<IPython.core.display.Javascript object>

Since we are not interested in 'O' tags we will check the performance of the CRF model using f1 scores for every tag except O tags.

In [28]:
labels.remove("O")
labels

['U-Action',
 'U-Utensil',
 'B-Utensil',
 'L-Utensil',
 'U-Ingredient',
 'B-Ingredient',
 'L-Ingredient',
 'I-Utensil',
 'I-Ingredient',
 'B-Action',
 'L-Action']

<IPython.core.display.Javascript object>

In [29]:
# performing predictions based on the fitted model
y_pred = crf.predict(x_test)

<IPython.core.display.Javascript object>

In [30]:
# finding the f1 score
metrics.flat_f1_score(y_test, y_pred, average="weighted", labels=labels)

0.719570446804729

<IPython.core.display.Javascript object>

In [31]:
# finding the f1 score
print(metrics.flat_classification_report(y_test, y_pred, labels=labels, digits=3))

              precision    recall  f1-score   support

    U-Action      0.911     0.878     0.894        82
   U-Utensil      1.000     0.412     0.583        17
   B-Utensil      1.000     0.231     0.375        13
   L-Utensil      1.000     0.308     0.471        13
U-Ingredient      0.738     0.689     0.713        45
B-Ingredient      0.474     0.750     0.581        12
L-Ingredient      0.474     0.750     0.581        12
   I-Utensil      1.000     0.333     0.500         3
I-Ingredient      0.000     0.000     0.000         0
    B-Action      0.000     0.000     0.000         0
    L-Action      0.000     0.000     0.000         0

   micro avg      0.782     0.690     0.733       197
   macro avg      0.600     0.395     0.427       197
weighted avg      0.839     0.690     0.720       197



<IPython.core.display.Javascript object>

The model overfits!!

In [32]:
len(data[((data["word"] == "Boil") | (data["word"] == "boil"))])

13

<IPython.core.display.Javascript object>

In [33]:
len(data[((data["word"] == "Cover") | (data["word"] == "cover"))])

17

<IPython.core.display.Javascript object>

In [34]:
len(data[((data["word"] == "Preheat") | (data["word"] == "preheat"))])

9

<IPython.core.display.Javascript object>

In [35]:
eli5.show_weights(crf, top=30)

From \ To,O,B-Action,L-Action,U-Action,B-Ingredient,I-Ingredient,L-Ingredient,U-Ingredient,B-Utensil,I-Utensil,L-Utensil,U-Utensil
O,3.17,0.363,0.0,1.63,1.197,0.0,0.0,2.178,2.136,0.0,0.0,1.323
B-Action,0.0,0.0,2.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
L-Action,0.368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
U-Action,1.472,0.0,0.0,0.0,0.738,0.0,0.0,1.544,0.0,0.0,0.0,1.47
B-Ingredient,0.0,0.0,0.0,0.0,0.0,1.447,5.673,0.0,0.0,0.0,0.0,0.0
I-Ingredient,-0.108,0.0,0.0,0.0,0.0,0.0,1.425,0.0,0.0,0.0,0.0,0.0
L-Ingredient,1.276,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
U-Ingredient,1.786,0.0,0.0,0.313,0.615,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-Utensil,-1.275,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.279,4.216,0.0
I-Utensil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.542,3.67,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11
+3.657,bias,,,,,,,,,,
+2.062,+1:word.lower():of,,,,,,,,,,
+1.907,word.lower():hour,,,,,,,,,,
+1.816,word.lower():let,,,,,,,,,,
+1.802,-1:word.lower():salted,,,,,,,,,,
+1.789,postag:VBZ,,,,,,,,,,
+1.768,word.lower():depending,,,,,,,,,,
+1.685,+1:word.lower()::,,,,,,,,,,
+1.647,word.lower():boil,,,,,,,,,,
+1.635,+1:word.lower():for,,,,,,,,,,

Weight?,Feature
+3.657,bias
+2.062,+1:word.lower():of
+1.907,word.lower():hour
+1.816,word.lower():let
+1.802,-1:word.lower():salted
+1.789,postag:VBZ
+1.768,word.lower():depending
+1.685,+1:word.lower()::
+1.647,word.lower():boil
+1.635,+1:word.lower():for

Weight?,Feature
0.965,-1:word.lower():and
0.866,-1:postag[:2]:CC
0.866,-1:postag:CC
0.741,word[-2:]:ll
0.719,word[-3:]:oll
0.719,word.lower():roll
0.716,+1:word.lower():up
0.694,+1:word.lower():lightly
0.687,+1:postag:RP
0.687,+1:postag[:2]:RP

Weight?,Feature
0.884,-1:word.lower():brown
0.883,word.lower():lightly
0.848,word[-3:]:tly
0.768,-1:word.lower():roll
0.719,word[-2:]:ly
0.685,postag[:2]:RB
0.683,word.lower():up
0.683,word[-3:]:up
0.678,postag:RB
0.659,postag:RP

Weight?,Feature
+2.710,postag[:2]:VB
+2.665,word.istitle()
+2.145,-1:word.lower():before
+2.081,-1:word.lower():teaspoonful
+2.068,word.lower():boil
+1.919,postag:VB
+1.870,word[-3:]:oil
+1.776,word.lower():cover
+1.761,-1:word.lower():continue
+1.739,+1:word.lower():ground

Weight?,Feature
+1.759,+1:postag[:2]:NN
+1.350,+1:word.lower():cream
+1.005,word[-2:]:ed
+0.993,+1:word.lower():cheese
+0.950,-1:word.lower():with
+0.938,+1:word.lower():sugar
+0.933,word[-3:]:und
+0.933,word.lower():ground
+0.932,+1:postag:NNS
+0.808,"-1:postag[:2]:,"

Weight?,Feature
1.843,word.lower():water
1.259,word[-3:]:ter
1.135,-1:word.lower():of
1.051,+1:word.lower():has
1.037,+1:word.lower():wrapper
1.016,word.lower():roll
1.016,word[-3:]:oll
0.991,-1:word.lower():egg
0.951,word[-2:]:ll
0.942,+1:postag:VBZ

Weight?,Feature
+0.995,-1:word.lower():ground
+0.900,word.lower():cream
+0.900,word[-3:]:eam
+0.856,word[-2:]:am
+0.838,word[-2:]:ck
+0.743,word.lower():wrapper
+0.738,-1:postag[:2]:NN
+0.732,word[-3:]:gar
+0.732,word.lower():sugar
+0.727,-1:word.lower():roll

Weight?,Feature
+3.056,word.lower():oil
+2.562,-1:word.lower():the
+2.196,word.lower():chicken
+2.044,word[-3:]:oes
+2.044,word.lower():potatoes
+2.035,word.lower():flour
+1.979,word.lower():butter
+1.891,word.lower():loaves
+1.876,word[-3:]:ken
+1.738,word.lower():ribs

Weight?,Feature
+1.722,-1:word.lower():a
+1.257,word[-2:]:um
+1.114,-1:word.lower():or
+1.101,+1:postag:JJ
+1.044,+1:word.lower():low
+1.041,+1:word.lower():pot
+0.993,+1:postag[:2]:JJ
+0.917,word[-3:]:low
+0.912,word[-2:]:ow
+0.854,+1:postag[:2]:NN

Weight?,Feature
+1.528,+1:postag[:2]:NN
+0.998,-1:word.lower():glass
+0.958,-1:postag:JJ
+0.937,+1:word.lower():sauce
+0.932,-1:postag[:2]:JJ
+0.539,word[-2:]:up
+0.482,word[-3:]:uce
+0.459,word.lower():sauce
+0.416,+1:word.lower():dish
+0.406,+1:postag:NNS

Weight?,Feature
+1.269,word[-3:]:pan
+1.188,-1:word.lower():large
+1.142,word[-2:]:an
+0.935,word[-2:]:ls
+0.925,-1:word.lower():soup
+0.871,word[-3:]:pot
+0.871,word.lower():pot
+0.853,word[-2:]:ot
+0.843,-1:postag:NN
+0.815,postag[:2]:NN

Weight?,Feature
+2.367,word.lower():blender
+2.006,word.lower():pans
+1.964,word.lower():jars
+1.964,word[-3:]:ars
+1.583,word.lower():oven
+1.577,word[-3:]:ven
+1.530,-1:word.lower():serving
+1.496,word[-2:]:ag
+1.496,word[-3:]:bag
+1.496,word.lower():bag


<IPython.core.display.Javascript object>

**Hyperparameter Optimization**

In [36]:
# creating a CRF Hyperparameter tuned model
crf_hp = CRF(
    algorithm="lbfgs", c1=10, c2=0.1, max_iterations=100, all_possible_transitions=False
)

# parameters to hypertune
params_space = {"c1": scipy.stats.expon(scale=0.5), "c2": scipy.stats.expon(scale=0.05)}

# metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score, average="weighted", labels=labels)

# search using 3 fold cross validation
rs = RandomizedSearchCV(
    crf_hp, params_space, cv=3, verbose=1, n_jobs=-1, n_iter=50, scoring=f1_scorer
)

<IPython.core.display.Javascript object>

In [None]:
# fitting the hyperparameters
rs.fit(x_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   14.9s


Best Parameters:

In [None]:
print("best params:", rs.best_params_)
print("best CV score:", rs.best_score_)

In [None]:
# set the best estimator
crf_hp_tuned = rs.best_estimator_

In [None]:
# predict using the best CRF model
y_pred = crf_hp_tuned.predict(x_test)

In [None]:
# print the f1 evaluation metric
print(metrics.flat_classification_report(y_test, y_pred, labels=labels, digits=3))

In [None]:
# transition features: given B-Utensil then what is score for I-Utensil

In [None]:
# state features: what property of feature describes the tag

In [None]:
eli5.show_weights(crf_hp_tuned, top=30)

## Untagged Data Stuff

In [None]:
untagged_test_data = pd.read_csv("./Untagged Test Data/untagged_test_data.csv")

In [None]:
untagged_test_data.head()

In [None]:
len(untagged_test_data)

In [None]:
# 30:5:30

In [None]:
agg_func_test = lambda s: [
    (w, p) for w, p in zip(s["word"].values.tolist(), s["pos"].values.tolist())
]

In [None]:
grouped_test = untagged_test_data.groupby(["recipe_name", "Step#"]).apply(agg_func_test)

In [None]:
test_sentences = [s for s in grouped_test]

In [None]:
len(test_sentences)

In [None]:
X_untagged_test = [sent2features(s) for s in test_sentences]

In [None]:
len(X_untagged_test)

In [None]:
pred_test = crf_hp_tuned.predict(X_untagged_test)

In [None]:
len(pred_test)

In [None]:
count = 0
for l in pred_test:
    for tag in l:
        if tag == "U-Action":
            count += 1

In [None]:
count

In [None]:
len(list(untagged_test_data["word"].values))

In [None]:
len(list(set(untagged_test_data["word"].values)))

Boostrap output to the untagged data df

In [None]:
flat_list = []
for sublist in pred_test:
    for item in sublist:
        flat_list.append(item)

In [None]:
len(flat_list)

In [None]:
untagged_test_data["Predicted Output"] = flat_list

In [None]:
len(untagged_test_data[untagged_test_data["Predicted Output"] == "U-Action"])

In [None]:
untagged_test_data[untagged_test_data["Predicted Output"] == "U-Action"]