# Conditional Random Fields

In [21]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

import nltk
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import RandomizedSearchCV
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics



In [2]:
corpus = nltk.corpus.conll2002
train_sents = list (corpus.iob_sents('esp.train'))
test_sents = list (corpus.iob_sents('esp.testb'))

In [3]:
def word2features(sent,i):
    word = sent [i][0]
    postag = sent [i][1]
    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2]
    }
    if i > 0:
        word1 = sent [i-1][0]
        postag1 = sent [i-1][1]
        features.update ({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2]
        })
    else:
        features ['BOS'] = True
    if (i < len (sent) - 1):
        word1 = sent [i+1][0]
        postag1 = sent [i+1][1]
        features.update ({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2]
        })
    else:
        features ['EOS'] = True
    return features

def sent2features(sent):
    return [word2features (sent, i) for i in range (len (sent))]

def sent2labels(sent):
    return [l for t, p, l in sent]

def sent2tokens(sent):
    return [t for t, p, l in sent]

In [4]:
%%time
x_train = [sent2features (s) for s in train_sents]
y_train = [sent2labels (s) for s in train_sents]
x_test = [sent2features (s) for s in test_sents]
y_test = [sent2labels (s) for s in test_sents]

CPU times: user 784 ms, sys: 76 ms, total: 860 ms
Wall time: 860 ms


In [6]:
%%time

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(x_train, y_train)

CPU times: user 27.3 s, sys: 20 ms, total: 27.4 s
Wall time: 27.4 s


In [13]:
labels = list(crf.classes_)
labels.remove ('O')

y_pred = crf.predict (x_test)
metrics.flat_f1_score (y_test, y_pred, average='weighted', labels=labels)

0.7964686316443963

In [18]:
sorted_labels = sorted (labels, key=lambda name: (name [1:], name[0]))
print (metrics.flat_classification_report(
    y_test, y_pred, labels=sorted_labels, digits=3
))

             precision    recall  f1-score   support

      B-LOC      0.810     0.784     0.797      1084
      I-LOC      0.690     0.637     0.662       325
     B-MISC      0.731     0.569     0.640       339
     I-MISC      0.699     0.589     0.639       557
      B-ORG      0.807     0.832     0.820      1400
      I-ORG      0.852     0.786     0.818      1104
      B-PER      0.850     0.884     0.867       735
      I-PER      0.893     0.943     0.917       634

avg / total      0.809     0.787     0.796      6178



In [23]:
%%time
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    max_iterations=100,
    all_possible_transitions=True
)
params_space = {
    'c1': scipy.stats.expon (scale=0.5),
    'c2': scipy.stats.expon (scale=0.05)
}
f1_scorer = make_scorer(metrics.flat_f1_score, average='weighted', labels=labels)
rs = RandomizedSearchCV(crf, params_space, cv=3, verbose=1, n_jobs=-1, n_iter=50, scoring=f1_scorer)
rs.fit (x_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 11.1min finished


CPU times: user 2min 30s, sys: 2.31 s, total: 2min 32s
Wall time: 11min 31s


In [None]:
_x = [s.parameters ['c1'] for s in rs.grid_scores_]
_y = [s.parameters ['c2'] for s in rs.grid_scores_]
_c = [s.mean_validation_score for s in rs.grid_scores_]
fig = plt.figure()
fig.set_size_inches (12, 12)
ax = plt.gca ()
ax.scatter (_x, _y)

In [45]:
crf = rs.best_estimator_
y_pred = crf.predict (x_test)
print (metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3))

             precision    recall  f1-score   support

      B-LOC      0.811     0.785     0.798      1084
      I-LOC      0.704     0.637     0.669       325
     B-MISC      0.732     0.555     0.631       339
     I-MISC      0.720     0.576     0.640       557
      B-ORG      0.808     0.838     0.823      1400
      I-ORG      0.845     0.796     0.820      1104
      B-PER      0.846     0.884     0.865       735
      I-PER      0.893     0.945     0.918       634

avg / total      0.811     0.788     0.798      6178

