https://github.com/susanli2016/NLP-with-Python/blob/master/NER_sklearn.ipynb

In [17]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

In [18]:
df = pd.read_csv('https://raw.githubusercontent.com/susanli2016/NLP-with-Python/master/data/ner_dataset.csv', encoding = "ISO-8859-1")
df = df[:100000]
df.head()

Unnamed: 0,Sentence #,Word,POS,Tag
0,Sentence: 1,Thousands,NNS,O
1,,of,IN,O
2,,demonstrators,NNS,O
3,,have,VBP,O
4,,marched,VBN,O


In [19]:
df.isnull().sum()

Sentence #    95456
Word              0
POS               0
Tag               0
dtype: int64

In [20]:
df = df.fillna(method='ffill')

df['Sentence #'].nunique(), df.Word.nunique(), df.Tag.nunique()

(4544, 10922, 17)

In [21]:
df.head()

Unnamed: 0,Sentence #,Word,POS,Tag
0,Sentence: 1,Thousands,NNS,O
1,Sentence: 1,of,IN,O
2,Sentence: 1,demonstrators,NNS,O
3,Sentence: 1,have,VBP,O
4,Sentence: 1,marched,VBN,O


In [23]:
df.groupby('Tag').size().reset_index(name='counts')

Unnamed: 0,Tag,counts
0,B-art,75
1,B-eve,53
2,B-geo,3303
3,B-gpe,1740
4,B-nat,30
5,B-org,1876
6,B-per,1668
7,B-tim,1823
8,I-art,43
9,I-eve,47


In [24]:
X = df.drop('Tag', axis=1)
X.head()

Unnamed: 0,Sentence #,Word,POS
0,Sentence: 1,Thousands,NNS
1,Sentence: 1,of,IN
2,Sentence: 1,demonstrators,NNS
3,Sentence: 1,have,VBP
4,Sentence: 1,marched,VBN


In [25]:
v = DictVectorizer(sparse=False)
X = v.fit_transform(X.to_dict('records'))
X = X.astype(np.uint8) # it is because of memory error.
X.shape

(100000, 15507)

In [26]:
y = df.Tag.values
# y = y.astype(np.uint8)
classes = np.unique(y).tolist()
classes

['B-art',
 'B-eve',
 'B-geo',
 'B-gpe',
 'B-nat',
 'B-org',
 'B-per',
 'B-tim',
 'I-art',
 'I-eve',
 'I-geo',
 'I-gpe',
 'I-nat',
 'I-org',
 'I-per',
 'I-tim',
 'O']

In [27]:
X.shape, y.shape

((100000, 15507), (100000,))

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=0)
X_train.shape, y_train.shape

((67000, 15507), (67000,))

In [29]:
new_classes = classes.copy()
new_classes.pop() # to remove "O" from the class label
new_classes

['B-art',
 'B-eve',
 'B-geo',
 'B-gpe',
 'B-nat',
 'B-org',
 'B-per',
 'B-tim',
 'I-art',
 'I-eve',
 'I-geo',
 'I-gpe',
 'I-nat',
 'I-org',
 'I-per',
 'I-tim']

In [30]:
per = Perceptron(verbose=10, n_jobs=-1, max_iter=5)
per.partial_fit(X_train, y_train, classes)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.


-- Epoch 1-- Epoch 1

-- Epoch 1
-- Epoch 1
-- Epoch 1
-- Epoch 1
-- Epoch 1
-- Epoch 1
Norm: 48.83, NNZs: 1578, Bias: -4.000000, T: 67000, Avg. loss: 0.022328
Total training time: 42.25 seconds.
-- Epoch 1
Norm: 13.42, NNZs: 162, Bias: -4.000000, T: 67000, Avg. loss: 0.001642
Total training time: 42.53 seconds.


[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   42.8s


-- Epoch 1
Norm: 11.53, NNZs: 113, Bias: -3.000000, T: 67000, Avg. loss: 0.001060
Total training time: 42.75 seconds.
-- Epoch 1
Norm: 68.07, NNZs: 2642, Bias: -4.000000, T: 67000, Avg. loss: 0.041776
Total training time: 42.88 seconds.
-- Epoch 1


[Parallel(n_jobs=-1)]: Done   4 out of  17 | elapsed:   43.1s remaining:  2.3min


Norm: 8.43, NNZs: 57, Bias: -3.000000, T: 67000, Avg. loss: 0.000567
Total training time: 43.32 seconds.
-- Epoch 1
Norm: 44.41, NNZs: 1127, Bias: -4.000000, T: 67000, Avg. loss: 0.017164
Total training time: 44.44 seconds.
-- Epoch 1


[Parallel(n_jobs=-1)]: Done   6 out of  17 | elapsed:   44.7s remaining:  1.4min


Norm: 49.90, NNZs: 1337, Bias: -4.000000, T: 67000, Avg. loss: 0.015328
Total training time: 44.64 seconds.
-- Epoch 1
Norm: 56.87, NNZs: 2044, Bias: -4.000000, T: 67000, Avg. loss: 0.034970
Total training time: 45.09 seconds.
-- Epoch 1


[Parallel(n_jobs=-1)]: Done   8 out of  17 | elapsed:   45.6s remaining:   51.3s


Norm: 10.44, NNZs: 106, Bias: -3.000000, T: 67000, Avg. loss: 0.001060
Total training time: 30.42 seconds.
Norm: 11.45, NNZs: 96, Bias: -3.000000, T: 67000, Avg. loss: 0.000776
Total training time: 30.47 seconds.
-- Epoch 1


[Parallel(n_jobs=-1)]: Done  10 out of  17 | elapsed:  1.2min remaining:   51.3s


Norm: 11.00, NNZs: 102, Bias: -3.000000, T: 67000, Avg. loss: 0.001209
Total training time: 30.35 seconds.
Norm: 35.13, NNZs: 803, Bias: -4.000000, T: 67000, Avg. loss: 0.011149
Total training time: 30.63 seconds.


[Parallel(n_jobs=-1)]: Done  12 out of  17 | elapsed:  1.2min remaining:   30.6s


Norm: 6.24, NNZs: 31, Bias: -3.000000, T: 67000, Avg. loss: 0.000209
Total training time: 30.20 seconds.
Norm: 60.35, NNZs: 2091, Bias: -6.000000, T: 67000, Avg. loss: 0.026940
Total training time: 29.58 seconds.
Norm: 53.57, NNZs: 1703, Bias: -4.000000, T: 67000, Avg. loss: 0.026224
Total training time: 29.90 seconds.


[Parallel(n_jobs=-1)]: Done  14 out of  17 | elapsed:  1.2min remaining:   15.9s


Norm: 30.53, NNZs: 672, Bias: -4.000000, T: 67000, Avg. loss: 0.012030
Total training time: 29.66 seconds.
Norm: 73.89, NNZs: 2851, Bias: 4.000000, T: 67000, Avg. loss: 0.048866
Total training time: 20.23 seconds.


[Parallel(n_jobs=-1)]: Done  17 out of  17 | elapsed:  1.6min finished


Perceptron(max_iter=5, n_jobs=-1, verbose=10)

In [35]:
print(classification_report(y_pred=per.predict(X_test), y_true=y_test, labels=new_classes))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       B-art       0.00      0.00      0.00        24
       B-eve       0.11      0.05      0.07        19
       B-geo       0.56      0.81      0.66      1085
       B-gpe       0.92      0.78      0.84       556
       B-nat       1.00      0.17      0.29        12
       B-org       0.39      0.52      0.44       589
       B-per       0.70      0.46      0.56       564
       B-tim       0.91      0.63      0.75       611
       I-art       0.00      0.00      0.00        12
       I-eve       0.67      0.22      0.33        18
       I-geo       0.75      0.42      0.54       230
       I-gpe       1.00      0.07      0.13        14
       I-nat       0.50      0.50      0.50         2
       I-org       0.48      0.50      0.49       445
       I-per       0.83      0.13      0.22       591
       I-tim       0.36      0.18      0.24       194

   micro avg       0.61      0.54      0.58      4966
   macro avg       0.57   

In [31]:
sgd = SGDClassifier()
sgd.partial_fit(X_train, y_train, classes)
print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       B-art       0.03      0.04      0.04        24
       B-eve       1.00      0.05      0.10        19
       B-geo       0.80      0.56      0.65      1085
       B-gpe       0.92      0.62      0.74       556
       B-nat       0.00      0.00      0.00        12
       B-org       0.26      0.71      0.38       589
       B-per       0.66      0.48      0.56       564
       B-tim       0.91      0.63      0.74       611
       I-art       1.00      0.08      0.15        12
       I-eve       1.00      0.06      0.11        18
       I-geo       0.72      0.57      0.63       230
       I-gpe       1.00      0.07      0.13        14
       I-nat       0.00      0.00      0.00         2
       I-org       0.70      0.40      0.51       445
       I-per       0.74      0.46      0.57       591
       I-tim       0.29      0.03      0.05       194

   micro avg       0.59      0.53      0.56      4966
   macro avg       0.63   

In [32]:
nb = MultinomialNB(alpha=0.01)
nb.partial_fit(X_train, y_train, classes)
print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       B-art       0.03      0.04      0.04        24
       B-eve       1.00      0.05      0.10        19
       B-geo       0.80      0.56      0.65      1085
       B-gpe       0.92      0.62      0.74       556
       B-nat       0.00      0.00      0.00        12
       B-org       0.26      0.71      0.38       589
       B-per       0.66      0.48      0.56       564
       B-tim       0.91      0.63      0.74       611
       I-art       1.00      0.08      0.15        12
       I-eve       1.00      0.06      0.11        18
       I-geo       0.72      0.57      0.63       230
       I-gpe       1.00      0.07      0.13        14
       I-nat       0.00      0.00      0.00         2
       I-org       0.70      0.40      0.51       445
       I-per       0.74      0.46      0.57       591
       I-tim       0.29      0.03      0.05       194

   micro avg       0.59      0.53      0.56      4966
   macro avg       0.63   

In [33]:
pa =PassiveAggressiveClassifier()
pa.partial_fit(X_train, y_train, classes)
print(classification_report(y_pred=sgd.predict(X_test), y_true=y_test, labels=new_classes))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       B-art       0.03      0.04      0.04        24
       B-eve       1.00      0.05      0.10        19
       B-geo       0.80      0.56      0.65      1085
       B-gpe       0.92      0.62      0.74       556
       B-nat       0.00      0.00      0.00        12
       B-org       0.26      0.71      0.38       589
       B-per       0.66      0.48      0.56       564
       B-tim       0.91      0.63      0.74       611
       I-art       1.00      0.08      0.15        12
       I-eve       1.00      0.06      0.11        18
       I-geo       0.72      0.57      0.63       230
       I-gpe       1.00      0.07      0.13        14
       I-nat       0.00      0.00      0.00         2
       I-org       0.70      0.40      0.51       445
       I-per       0.74      0.46      0.57       591
       I-tim       0.29      0.03      0.05       194

   micro avg       0.59      0.53      0.56      4966
   macro avg       0.63   

# Conditional Random Fields (CRFs)

In [37]:
!pip install sklearn_crfsuite

Collecting sklearn_crfsuite
  Downloading sklearn_crfsuite-0.3.6-py2.py3-none-any.whl (12 kB)
Collecting python-crfsuite>=0.8.3
  Downloading python_crfsuite-0.9.7-cp38-cp38-win_amd64.whl (156 kB)
Collecting tabulate
  Downloading tabulate-0.8.9-py3-none-any.whl (25 kB)
Installing collected packages: python-crfsuite, tabulate, sklearn-crfsuite
Successfully installed python-crfsuite-0.9.7 sklearn-crfsuite-0.3.6 tabulate-0.8.9


In [38]:
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

In [39]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(), 
                                                           s['POS'].values.tolist(), 
                                                           s['Tag'].values.tolist())]
        self.grouped = self.data.groupby('Sentence #').apply(agg_func)
        self.sentences = [s for s in self.grouped]
        
    def get_next(self):
        try: 
            s = self.grouped['Sentence: {}'.format(self.n_sent)]
            self.n_sent += 1
            return s 
        except:
            return None
        
getter = SentenceGetter(df)

sent = getter.get_next()
print(sent)


[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]


In [41]:
sentences = getter.sentences

In [42]:
def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]
    
    features = {
        'bias': 1.0, 
        'word.lower()': word.lower(), 
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True
    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    return [token for token, postag, label in sent]

In [43]:
X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)



CRF(algorithm='lbfgs', all_possible_transitions=True, c1=0.1, c2=0.1,
    keep_tempfiles=None, max_iterations=100)

In [44]:
y_pred = crf.predict(X_test)
metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=new_classes)

0.7842087494747214

In [45]:
print(metrics.flat_classification_report(y_test, y_pred, labels = new_classes))

              precision    recall  f1-score   support

       B-art       1.00      0.03      0.07        29
       B-eve       0.86      0.25      0.39        24
       B-geo       0.75      0.88      0.81      1043
       B-gpe       0.89      0.78      0.83       588
       B-nat       0.67      0.20      0.31        10
       B-org       0.75      0.64      0.69       649
       B-per       0.81      0.81      0.81       546
       B-tim       0.90      0.85      0.87       589
       I-art       0.00      0.00      0.00         7
       I-eve       0.57      0.22      0.32        18
       I-geo       0.71      0.71      0.71       204
       I-gpe       0.47      0.53      0.50        17
       I-nat       1.00      0.50      0.67         2
       I-org       0.78      0.73      0.76       545
       I-per       0.80      0.90      0.85       574
       I-tim       0.79      0.68      0.73       185

   micro avg       0.80      0.78      0.79      5030
   macro avg       0.73   



In [47]:
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    max_iterations=100,
    all_possible_transitions=True
)
params_space = {
    'c1': scipy.stats.expon(scale=0.5),
    'c2': scipy.stats.expon(scale=0.05),
}

# use the same metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=new_classes)

# search
rs = RandomizedSearchCV(crf, params_space,
                        cv=3,
                        verbose=1,
                        n_jobs=-1,
                        n_iter=50,
                        scoring=f1_scorer)
rs.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   58.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  3.9min finished


RandomizedSearchCV(cv=3,
                   estimator=CRF(algorithm='lbfgs',
                                 all_possible_transitions=True,
                                 keep_tempfiles=None, max_iterations=100),
                   n_iter=50, n_jobs=-1,
                   param_distributions={'c1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000018C0E9ADFD0>,
                                        'c2': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000018C10108A90>},
                   scoring=make_scorer(flat_f1_score, average=weighted, labels=['B-art', 'B-eve', 'B-geo', 'B-gpe', 'B-nat', 'B-org', 'B-per', 'B-tim', 'I-art', 'I-eve', 'I-geo', 'I-gpe', 'I-nat', 'I-org', 'I-per', 'I-tim']),
                   verbose=1)

In [48]:
print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))

best params: {'c1': 0.11305935227748311, 'c2': 0.12906200169808135}
best CV score: 0.7715840380246343
model size: 0.71M


In [49]:
crf = rs.best_estimator_
y_pred = crf.predict(X_test)
print(metrics.flat_classification_report(y_test, y_pred, labels=new_classes))

              precision    recall  f1-score   support

       B-art       1.00      0.03      0.07        29
       B-eve       1.00      0.25      0.40        24
       B-geo       0.75      0.88      0.81      1043
       B-gpe       0.89      0.78      0.83       588
       B-nat       0.67      0.20      0.31        10
       B-org       0.75      0.63      0.69       649
       B-per       0.81      0.81      0.81       546
       B-tim       0.90      0.84      0.87       589
       I-art       0.00      0.00      0.00         7
       I-eve       1.00      0.22      0.36        18
       I-geo       0.69      0.72      0.70       204
       I-gpe       0.56      0.53      0.55        17
       I-nat       1.00      0.50      0.67         2
       I-org       0.79      0.73      0.76       545
       I-per       0.80      0.90      0.85       574
       I-tim       0.80      0.68      0.74       185

   micro avg       0.80      0.78      0.79      5030
   macro avg       0.78   

In [50]:
from collections import Counter

def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))

print("Top likely transitions:")
print_transitions(Counter(crf.transition_features_).most_common(20))

print("\nTop unlikely transitions:")
print_transitions(Counter(crf.transition_features_).most_common()[-20:])

Top likely transitions:
B-art  -> I-art   5.283328
B-eve  -> I-eve   5.270569
B-geo  -> I-geo   5.270382
I-tim  -> I-tim   5.222488
I-art  -> I-art   5.023154
B-tim  -> I-tim   4.992659
I-geo  -> I-geo   4.820028
B-org  -> I-org   4.614503
B-per  -> I-per   4.599116
I-eve  -> I-eve   4.583274
B-gpe  -> I-gpe   4.524249
I-org  -> I-org   4.401437
I-gpe  -> I-gpe   4.337382
I-per  -> I-per   3.738520
O      -> O       3.563318
B-nat  -> I-nat   3.547590
B-org  -> B-art   2.217224
I-nat  -> I-nat   2.073834
O      -> B-per   2.013308
B-geo  -> B-tim   1.423722

Top unlikely transitions:
B-org  -> I-geo   -1.673977
O      -> I-eve   -1.707539
I-per  -> I-org   -1.738671
B-gpe  -> I-org   -1.938973
B-geo  -> I-per   -1.948263
B-geo  -> I-org   -1.953791
B-org  -> B-org   -1.977926
B-gpe  -> I-geo   -2.018338
I-org  -> B-org   -2.033628
O      -> I-art   -2.072506
B-tim  -> B-tim   -2.148417
B-org  -> I-per   -2.169847
I-org  -> I-per   -2.563002
I-per  -> B-per   -2.792437
O      -> I-per  

In [51]:
def print_state_features(state_features):
    for (attr, label), weight in state_features:
        print("%0.6f %-8s %s" % (weight, label, attr))

print("Top positive:")
print_state_features(Counter(crf.state_features_).most_common(30))

print("\nTop negative:")
print_state_features(Counter(crf.state_features_).most_common()[-30:])

Top positive:
5.327288 B-tim    word[-3:]:day
4.800047 O        BOS
4.464653 O        bias
3.780682 O        word.lower():jewish
3.593996 O        word.lower():kurdish
3.397269 I-tim    word[-3:]:day
3.380532 B-org    word.lower():al-qaida
3.189759 B-tim    word.lower():afternoon
3.182469 B-gpe    word.istitle()
3.155375 B-per    word.lower():president
3.154668 B-tim    word[-2:]:0s
3.122194 B-tim    word.lower():thanksgiving
3.100669 B-org    word.lower():hamas
3.089381 O        word[-2:]:N1
2.978874 B-gpe    word.lower():nepal
2.963069 B-tim    word[-2:]:ay
2.958882 B-gpe    word[-3:]:pal
2.956320 B-org    word.lower():parliament
2.909123 B-org    word[-3:]:ban
2.904545 B-tim    +1:word.lower():year
2.877306 B-tim    word[-3:]:ber
2.842484 B-per    word.lower():gotovina
2.818700 B-per    word.lower():obama
2.806891 B-per    BOS
2.805078 O        -1:word.lower():prime
2.797923 I-geo    +1:word.lower():town
2.772329 O        +1:word.lower():minister
2.748452 O        word.lower():last


In [52]:
!pip install eli5
import eli5

eli5.show_weights(crf, top=10)

Collecting eli5
  Downloading eli5-0.11.0-py2.py3-none-any.whl (106 kB)
Collecting graphviz
  Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Installing collected packages: graphviz, eli5
Successfully installed eli5-0.11.0 graphviz-0.16




From \ To,O,B-art,I-art,B-eve,I-eve,B-geo,I-geo,B-gpe,I-gpe,B-nat,I-nat,B-org,I-org,B-per,I-per,B-tim,I-tim
O,3.563,0.74,-2.073,1.264,-1.708,1.157,-4.145,0.56,-1.21,0.307,-1.02,0.947,-4.396,2.013,-2.924,1.394,-3.848
B-art,-0.588,0.0,5.283,0.0,0.0,0.0,-0.013,-0.193,0.0,0.0,0.0,0.526,-0.278,-0.509,-0.601,-0.393,0.0
I-art,-0.812,0.0,5.023,0.0,0.0,0.443,-0.13,0.0,0.0,0.0,0.0,-0.2,-0.211,-0.614,-0.397,0.087,-0.09
B-eve,-0.375,0.0,0.0,0.0,5.271,0.0,0.0,-0.044,0.0,0.0,0.0,-0.097,-0.101,-0.604,-0.136,-0.383,-0.183
I-eve,-0.316,0.0,0.0,-0.219,4.583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.082,-0.105,-0.27,0.0
B-geo,0.047,1.126,-0.795,0.0,-0.545,-1.346,5.27,0.703,-1.495,0.0,-0.285,-1.006,-1.954,-1.183,-1.948,1.424,-1.226
I-geo,0.059,0.0,-0.051,0.0,0.0,-0.571,4.82,-0.45,-0.407,0.0,0.0,-0.463,-0.766,-0.857,-0.576,1.018,-0.752
B-gpe,0.7,-0.004,-0.515,-0.0,-0.416,-0.002,-2.018,-3.183,4.524,0.0,0.0,0.992,-1.939,0.592,-1.369,-0.428,-0.7
I-gpe,-0.488,0.0,0.0,0.0,0.0,0.034,-0.257,-0.225,4.337,0.0,0.0,-0.206,-0.448,-0.426,-0.361,-0.402,0.0
B-nat,-0.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.548,0.0,0.0,-0.187,-0.017,0.0,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,Unnamed: 13_level_9,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,Unnamed: 13_level_10,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,Unnamed: 13_level_11,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11
Weight?,Feature,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12
Weight?,Feature,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13,Unnamed: 6_level_13,Unnamed: 7_level_13,Unnamed: 8_level_13,Unnamed: 9_level_13,Unnamed: 10_level_13,Unnamed: 11_level_13,Unnamed: 12_level_13,Unnamed: 13_level_13,Unnamed: 14_level_13,Unnamed: 15_level_13,Unnamed: 16_level_13
Weight?,Feature,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,Unnamed: 10_level_14,Unnamed: 11_level_14,Unnamed: 12_level_14,Unnamed: 13_level_14,Unnamed: 14_level_14,Unnamed: 15_level_14,Unnamed: 16_level_14
Weight?,Feature,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15,Unnamed: 6_level_15,Unnamed: 7_level_15,Unnamed: 8_level_15,Unnamed: 9_level_15,Unnamed: 10_level_15,Unnamed: 11_level_15,Unnamed: 12_level_15,Unnamed: 13_level_15,Unnamed: 14_level_15,Unnamed: 15_level_15,Unnamed: 16_level_15
Weight?,Feature,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16,Unnamed: 6_level_16,Unnamed: 7_level_16,Unnamed: 8_level_16,Unnamed: 9_level_16,Unnamed: 10_level_16,Unnamed: 11_level_16,Unnamed: 12_level_16,Unnamed: 13_level_16,Unnamed: 14_level_16,Unnamed: 15_level_16,Unnamed: 16_level_16
+4.800,BOS,,,,,,,,,,,,,,,
+4.465,bias,,,,,,,,,,,,,,,
+3.781,word.lower():jewish,,,,,,,,,,,,,,,
+3.594,word.lower():kurdish,,,,,,,,,,,,,,,
+3.089,word[-2:]:N1,,,,,,,,,,,,,,,
+2.805,-1:word.lower():prime,,,,,,,,,,,,,,,
… 1659 more positive …,… 1659 more positive …,,,,,,,,,,,,,,,
… 990 more negative …,… 990 more negative …,,,,,,,,,,,,,,,
-2.928,+1:word.lower():last,,,,,,,,,,,,,,,
-3.314,word.istitle(),,,,,,,,,,,,,,,

Weight?,Feature
+4.800,BOS
+4.465,bias
+3.781,word.lower():jewish
+3.594,word.lower():kurdish
+3.089,word[-2:]:N1
+2.805,-1:word.lower():prime
… 1659 more positive …,… 1659 more positive …
… 990 more negative …,… 990 more negative …
-2.928,+1:word.lower():last
-3.314,word.istitle()

Weight?,Feature
+2.242,word.lower():twitter
+2.205,word.lower():english
+1.915,-1:word.lower():tamilnet
+1.602,-1:word.lower():newspaper
+1.519,word.lower():dodge
+1.492,word.lower():jeep
+1.438,word.lower():facebook
+1.433,word[-3:]:ook
+1.413,word[-3:]:eep
+1.397,word[-2:]:ep

Weight?,Feature
+1.172,+1:word.lower():airport
+1.082,word.lower():constitution
+1.048,-1:word.lower():international
+1.042,-1:word.istitle()
+0.973,word[-3:]:Us
+0.973,word[-2:]:Us
+0.972,-1:word.lower():magazine
+0.969,word[-2:]:le
+0.950,word.lower():us
+0.935,+1:word.lower():newspaper

Weight?,Feature
+2.091,-1:word.lower():war
+1.640,-1:word.lower():first
+1.465,word.lower():christmas
+1.461,-1:word.lower():celebrated
+1.395,+1:word.lower():get
+1.349,word.lower():games
+1.270,word[-3:]:II
+1.270,word.lower():ii
+1.268,word[-2:]:II
+1.257,word[-3:]:mas

Weight?,Feature
+1.236,postag:NNPS
+1.045,word.lower():cup
+1.045,word[-3:]:Cup
+1.021,word[-2:]:rs
+0.987,word[-2:]:up
+0.984,word.lower():peace
+0.979,+1:word.lower():in
+0.959,-1:word.lower():korean
+0.951,word[-2:]:ng
+0.932,word.lower():open

Weight?,Feature
+2.657,-1:word.lower():serb
+2.646,word.lower():mid-september
+2.623,word.lower():aswat
+2.532,word.lower():washington
+2.410,word.lower():china
+2.399,word[-3:]:the
+2.391,word[-2:]:ai
+2.265,word.lower():beijing
+2.263,+1:word.lower():province
… 1297 more positive …,… 1297 more positive …

Weight?,Feature
+2.798,+1:word.lower():town
+2.534,+1:word.lower():block
+2.394,+1:word.lower():achieved
+2.021,word.lower():settlement
+1.883,+1:word.lower():produced
+1.824,-1:word.lower():western
+1.806,-1:word.lower():tulkarem
+1.770,+1:word.lower():base
+1.651,+1:word.lower():regional
+1.612,+1:word.lower():about

Weight?,Feature
+3.182,word.istitle()
+2.979,word.lower():nepal
+2.959,word[-3:]:pal
+2.705,word[-3:]:ans
+2.683,+1:word.lower():mayor
+2.597,postag:NNS
+2.559,+1:word.lower():representative
+2.467,word.lower():croats
+2.439,word[-3:]:ese
+2.358,word.lower():palestinian

Weight?,Feature
+2.490,+1:word.lower():began
+2.021,-1:word.lower():soviet
+1.943,+1:word.lower():health
+1.729,word[-3:]:can
+1.616,-1:postag:NNP
+1.582,+1:word.lower():that
+1.578,+1:word.lower():returned
+1.577,word.lower():city
+1.445,-1:word.lower():democratic
… 111 more positive …,… 111 more positive …

Weight?,Feature
+1.637,-1:word.lower():from
+1.588,word.isupper()
+1.498,word[-3:]:5N1
+1.498,word.lower():h5n1
+1.429,+1:word.lower():katrina
+1.415,word.lower():hurricane
+1.302,word.lower():marburg
+1.300,word[-3:]:ane
+1.269,word[-2:]:N1
+1.262,word[-3:]:urg

Weight?,Feature
+1.389,-1:word.lower():hurricane
+1.237,word.lower():katrina
+0.974,+1:word.lower():outbreak
+0.921,word[-3:]:ina
+0.846,word[-2:]:na
+0.759,word[-3:]:ome
+0.737,word.lower():syndrome
+0.736,-1:word.lower():respiratory
+0.724,-1:word.lower():acute
+0.724,+1:word.lower():syndrome

Weight?,Feature
+3.381,word.lower():al-qaida
+3.101,word.lower():hamas
+2.956,word.lower():parliament
+2.909,word[-3:]:ban
+2.681,-1:word.lower():brunei
+2.550,-1:word.lower():telephoned
+2.446,word[-3:]:The
+2.444,+1:word.lower():influence
+2.417,+1:word.lower():fought
+2.399,-1:word.lower():extremist

Weight?,Feature
+1.957,+1:word.lower():mr.
+1.752,word.lower():ministry
+1.731,+1:word.lower():will
+1.730,-1:word.lower():for
+1.719,-1:word.lower():mediterranean
+1.631,word[-3:]:ons
+1.582,-1:word.lower():munich
+1.576,-1:word.lower():the
+1.536,-1:word.lower():group
… 1145 more positive …,… 1145 more positive …

Weight?,Feature
+3.155,word.lower():president
+2.842,word.lower():gotovina
+2.819,word.lower():obama
+2.807,BOS
+2.505,word.lower():prime
+2.492,word.lower():jupiter
+2.347,word[-2:]:ll
+2.326,+1:word.lower():administration
+2.311,word.lower():western
+2.297,+1:word.lower():vladimir

Weight?,Feature
+1.539,+1:word.lower():saad
+1.479,+1:word.lower():david
+1.460,-1:postag:NN
+1.459,+1:word.lower():reports
+1.329,+1:word.lower():condoleezza
+1.327,word[-3:]:aad
… 855 more positive …,… 855 more positive …
… 178 more negative …,… 178 more negative …
-1.352,bias
-1.363,word[-2:]:ka

Weight?,Feature
+5.327,word[-3:]:day
+3.190,word.lower():afternoon
+3.155,word[-2:]:0s
+3.122,word.lower():thanksgiving
+2.963,word[-2:]:ay
+2.905,+1:word.lower():year
+2.877,word[-3:]:ber
+2.659,word.lower():august
+2.646,+1:word.lower():weeks
+2.610,word.lower():midnight

Weight?,Feature
+3.397,word[-3:]:day
+2.606,word[-2:]:ay
+2.184,-1:word.lower():ceremonies
+2.145,word.lower():decades
+2.030,word[-3:]:des
+2.029,+1:word.lower():moscow
+2.028,-1:word.lower():march
+1.845,word[-2:]:ry
+1.834,word.isdigit()
+1.733,word.lower():november


In [53]:
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=200,
    c2=0.1,
    max_iterations=20,
    all_possible_transitions=False,
)
crf.fit(X_train, y_train)
eli5.show_weights(crf, top=10)

From \ To,O,B-art,I-art,B-eve,I-eve,B-geo,I-geo,B-gpe,I-gpe,B-nat,I-nat,B-org,I-org,B-per,I-per,B-tim,I-tim
O,1.782,0.0,0.0,0.0,0.0,1.456,0.0,0.303,0.0,0.0,0.0,0.791,0.0,0.062,0.0,1.709,0.0
B-art,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-art,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-eve,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-eve,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-geo,0.23,0.0,0.0,0.0,0.0,0.0,2.704,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-geo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-gpe,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I-gpe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B-nat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8
+3.691,bias,,,,,,,,,,,,,,,
+1.774,BOS,,,,,,,,,,,,,,,
+0.984,-1:postag[:2]:NN,,,,,,,,,,,,,,,
+0.435,postag[:2]:VB,,,,,,,,,,,,,,,
+0.218,EOS,,,,,,,,,,,,,,,
… 10 more positive …,… 10 more positive …,,,,,,,,,,,,,,,
… 1 more negative …,… 1 more negative …,,,,,,,,,,,,,,,
-0.467,postag:CD,,,,,,,,,,,,,,,
-0.467,postag[:2]:CD,,,,,,,,,,,,,,,
-1.472,word.isdigit(),,,,,,,,,,,,,,,

Weight?,Feature
+3.691,bias
+1.774,BOS
+0.984,-1:postag[:2]:NN
+0.435,postag[:2]:VB
+0.218,EOS
… 10 more positive …,… 10 more positive …
… 1 more negative …,… 1 more negative …
-0.467,postag:CD
-0.467,postag[:2]:CD
-1.472,word.isdigit()

Weight?,Feature
1.052,postag:NNP
0.534,word.istitle()
0.218,-1:postag:IN
0.218,-1:postag[:2]:IN
0.125,-1:word.lower():in
-0.289,-1:postag[:2]:NN

Weight?,Feature
0.267,-1:postag:NNP

Weight?,Feature
1.533,postag:JJ
1.506,postag[:2]:JJ
1.139,word.istitle()
0.549,word[-2:]:an
-0.033,postag:NNP

Weight?,Feature
0.806,postag:NNP
0.565,postag[:2]:NN
0.23,-1:postag[:2]:DT
0.23,-1:postag:DT
0.004,word.isupper()

Weight?,Feature
0.496,-1:postag:NNP
0.377,-1:word.istitle()
0.225,-1:postag[:2]:NN

Weight?,Feature
0.51,postag:NNP
0.438,+1:postag:NNP
0.308,+1:word.istitle()
0.075,postag[:2]:NN
0.022,word.istitle()
0.002,+1:postag[:2]:NN

Weight?,Feature
0.881,-1:postag:NNP
0.48,-1:postag[:2]:NN
0.404,-1:word.istitle()
0.196,postag:NNP

Weight?,Feature
1.74,word[-2:]:ay
1.657,word[-3:]:day
0.204,postag[:2]:CD
0.204,postag:CD
0.096,bias
0.033,-1:postag[:2]:IN
0.033,-1:postag:IN


In [54]:
eli5.show_weights(crf, top=10, targets=['O', 'B-org', 'I-per'])

From \ To,O,B-org,I-per
O,1.782,0.791,0.0
B-org,0.0,0.0,0.0
I-per,0.0,0.0,1.473

Weight?,Feature,Unnamed: 2_level_0
Weight?,Feature,Unnamed: 2_level_1
Weight?,Feature,Unnamed: 2_level_2
+3.691,bias,
+1.774,BOS,
+0.984,-1:postag[:2]:NN,
+0.435,postag[:2]:VB,
+0.218,EOS,
… 10 more positive …,… 10 more positive …,
… 1 more negative …,… 1 more negative …,
-0.467,postag:CD,
-0.467,postag[:2]:CD,
-1.472,word.isdigit(),

Weight?,Feature
+3.691,bias
+1.774,BOS
+0.984,-1:postag[:2]:NN
+0.435,postag[:2]:VB
+0.218,EOS
… 10 more positive …,… 10 more positive …
… 1 more negative …,… 1 more negative …
-0.467,postag:CD
-0.467,postag[:2]:CD
-1.472,word.isdigit()

Weight?,Feature
0.806,postag:NNP
0.565,postag[:2]:NN
0.23,-1:postag[:2]:DT
0.23,-1:postag:DT
0.004,word.isupper()

Weight?,Feature
0.881,-1:postag:NNP
0.48,-1:postag[:2]:NN
0.404,-1:word.istitle()
0.196,postag:NNP


In [55]:

eli5.show_weights(crf, top=10, feature_re='^word\.is',
                  horizontal_layout=False, show=['targets'])

Weight?,Feature
-1.472,word.isdigit()
-2.43,word.istitle()

Weight?,Feature
0.534,word.istitle()

Weight?,Feature
1.139,word.istitle()

Weight?,Feature
0.004,word.isupper()

Weight?,Feature
0.022,word.istitle()
