In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from helper import *


In [2]:
df = pd.read_csv('data\\simple_ner.csv', encoding = "ISO-8859-1")
#df = df[:100000]
df.head()

Unnamed: 0,Sentence #,Word,POS,Tag
0,Sentence: 1,Thousands,NNS,O
1,,of,IN,O
2,,demonstrators,NNS,O
3,,have,VBP,O
4,,marched,VBN,O


In [3]:
df = df.fillna(method='ffill')
df['Sentence #'].nunique(), df.Word.nunique(), df.Tag.nunique()

(47959, 35178, 17)

In [4]:
df.groupby('Tag').size().reset_index(name='counts')

Unnamed: 0,Tag,counts
0,B-art,402
1,B-eve,308
2,B-geo,37644
3,B-gpe,15870
4,B-nat,201
5,B-org,20143
6,B-per,16990
7,B-tim,20333
8,I-art,297
9,I-eve,253


Continue here

In [5]:
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics
from collections import Counter
import helper

In [6]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(), 
                                                           s['POS'].values.tolist(), 
                                                           s['Tag'].values.tolist())]
        self.grouped = self.data.groupby('Sentence #').apply(agg_func)
        self.sentences = [s for s in self.grouped]
        
    def get_next(self):
        try: 
            s = self.grouped['Sentence: {}'.format(self.n_sent)]
            self.n_sent += 1
            return s 
        except:
            return None
getter = SentenceGetter(df)
sentences = getter.sentences

In [7]:
X = [sent2features_casual(s) for s in sentences]
y = [sent2labels(s) for s in sentences]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [8]:
X_train[0][2]

{'word.lower()': 'came',
 'word.isdigit()': False,
 'postag': 'VBD',
 'postag[:2]': 'VB',
 '-1:word.lower()': 'it',
 '-1:postag': 'PRP',
 '-1:postag[:2]': 'PR',
 '-2:postag': 'RB',
 '-2:postag[:2]': 'RB',
 '+1:word.lower()': 'to',
 '+1:postag': 'TO',
 '+1:postag[:2]': 'TO',
 '+2:postag': 'VB',
 '+2:postag[:2]': 'VB'}

In [9]:
crf = sklearn_crfsuite.CRF(
    algorithm='l2sgd',
    #c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)



CRF(algorithm='l2sgd', all_possible_transitions=True, c2=0.1,
    keep_tempfiles=None, max_iterations=100)

In [18]:
all_tags = df.Tag.values

In [19]:
classes = np.unique(all_tags)
classes = classes.tolist()

In [20]:
new_classes = classes.copy()
new_classes

['B-art',
 'B-eve',
 'B-geo',
 'B-gpe',
 'B-nat',
 'B-org',
 'B-per',
 'B-tim',
 'I-art',
 'I-eve',
 'I-geo',
 'I-gpe',
 'I-nat',
 'I-org',
 'I-per',
 'I-tim',
 'O']

In [21]:
y_pred = crf.predict(X_test)
print(metrics.flat_classification_report(y_test, y_pred, labels = new_classes))



              precision    recall  f1-score   support

       B-art       0.44      0.11      0.18       143
       B-eve       0.49      0.34      0.40       106
       B-geo       0.84      0.92      0.88     12447
       B-gpe       0.97      0.92      0.95      5284
       B-nat       0.83      0.31      0.45        78
       B-org       0.82      0.70      0.75      6615
       B-per       0.86      0.82      0.84      5652
       B-tim       0.92      0.88      0.90      6856
       I-art       0.12      0.04      0.06       105
       I-eve       0.29      0.22      0.25        93
       I-geo       0.80      0.82      0.81      2520
       I-gpe       0.88      0.55      0.68        69
       I-nat       1.00      0.35      0.52        23
       I-org       0.83      0.77      0.80      5597
       I-per       0.85      0.91      0.88      5674
       I-tim       0.84      0.72      0.78      2207
           O       0.99      0.99      0.99    291891

    accuracy              

In [22]:
new_classes.remove('O')

In [23]:
y_pred = crf.predict(X_test)
print(metrics.flat_classification_report(y_test, y_pred, labels = new_classes))



              precision    recall  f1-score   support

       B-art       0.44      0.11      0.18       143
       B-eve       0.49      0.34      0.40       106
       B-geo       0.84      0.92      0.88     12447
       B-gpe       0.97      0.92      0.95      5284
       B-nat       0.83      0.31      0.45        78
       B-org       0.82      0.70      0.75      6615
       B-per       0.86      0.82      0.84      5652
       B-tim       0.92      0.88      0.90      6856
       I-art       0.12      0.04      0.06       105
       I-eve       0.29      0.22      0.25        93
       I-geo       0.80      0.82      0.81      2520
       I-gpe       0.88      0.55      0.68        69
       I-nat       1.00      0.35      0.52        23
       I-org       0.83      0.77      0.80      5597
       I-per       0.85      0.91      0.88      5674
       I-tim       0.84      0.72      0.78      2207

   micro avg       0.86      0.84      0.85     53469
   macro avg       0.74   

In [24]:
from collections import Counter
def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))
print("Top likely transitions:")
print_transitions(Counter(crf.transition_features_).most_common(20))
print("\nTop unlikely transitions:")
print_transitions(Counter(crf.transition_features_).most_common()[-20:])

Top likely transitions:
I-org  -> I-org   8.570730
B-geo  -> I-geo   8.474305
B-org  -> I-org   8.421875
B-tim  -> I-tim   7.769305
B-per  -> I-per   7.729057
B-art  -> I-art   7.511492
I-tim  -> I-tim   7.388941
I-per  -> I-per   7.200962
I-art  -> I-art   7.198596
O      -> O       6.894479
I-eve  -> I-eve   6.685049
B-eve  -> I-eve   6.589331
B-gpe  -> I-gpe   6.514260
I-geo  -> I-geo   6.442124
B-nat  -> I-nat   5.558473
I-gpe  -> I-gpe   5.235349
O      -> B-per   5.100205
O      -> B-geo   4.718776
O      -> B-tim   4.637727
O      -> B-org   4.179881

Top unlikely transitions:
I-eve  -> B-tim   -2.047972
B-gpe  -> I-geo   -2.078873
I-per  -> B-geo   -2.083811
B-org  -> I-per   -2.091779
B-gpe  -> I-org   -2.176951
I-eve  -> B-eve   -2.293871
O      -> I-art   -2.441563
O      -> I-per   -2.638834
I-geo  -> B-geo   -2.799409
B-org  -> B-org   -2.993215
O      -> I-tim   -3.174125
O      -> I-org   -3.184291
B-geo  -> B-geo   -3.284742
I-org  -> B-org   -3.352645
I-tim  -> B-tim  

In [25]:
import eli5
eli5.show_weights(crf, top=10)

From \ To,O,B-art,I-art,B-eve,I-eve,B-geo,I-geo,B-gpe,I-gpe,B-nat,I-nat,B-org,I-org,B-per,I-per,B-tim,I-tim
O,6.894,2.422,-2.442,3.539,-2.035,4.719,-3.567,3.419,-1.228,2.561,-1.207,4.18,-3.184,5.1,-2.639,4.638,-3.174
B-art,-0.187,-0.202,7.511,-0.131,-0.277,0.328,-0.287,-1.299,-0.319,-0.118,-0.161,0.705,-0.486,-1.566,-0.529,-0.094,-0.536
I-art,-0.647,-0.381,7.199,-0.189,-0.19,0.425,-0.372,-0.952,-0.343,-0.053,-0.075,-0.881,-0.275,-0.112,-0.442,-1.516,-0.21
B-eve,-0.505,-0.131,-0.211,-0.433,6.589,-1.177,-0.328,-0.949,-0.246,-0.369,-0.169,-1.279,-0.439,-1.429,-0.393,0.415,-0.411
I-eve,0.284,-0.072,-0.08,-2.294,6.685,-0.562,-0.189,-0.392,-0.162,-0.029,-0.052,-0.52,-0.122,-0.707,-0.263,-2.048,-0.419
B-geo,1.846,1.154,-0.864,-0.772,-1.181,-3.285,8.474,1.343,-1.899,-0.455,-0.689,1.062,-1.951,-0.014,-1.811,2.142,-1.455
I-geo,0.246,1.846,-0.793,-0.731,-0.936,-2.799,6.442,-1.915,-1.537,-0.214,-0.358,0.027,-1.329,-0.281,-1.413,0.599,-1.476
B-gpe,3.119,-1.164,-0.865,-1.017,-1.795,0.599,-2.079,-4.131,6.514,-0.837,-0.673,2.207,-2.177,1.419,-1.845,0.471,-1.056
I-gpe,-0.135,-0.444,-0.079,-0.076,-0.201,-0.429,-0.494,-1.002,5.235,-0.064,-0.041,-1.401,-0.279,-0.355,-0.393,-1.313,-0.309
B-nat,-0.526,-0.052,-0.077,-0.069,-0.161,-0.438,-0.092,-0.697,-0.205,-0.772,5.558,-0.463,-0.132,-0.349,-0.302,-1.256,-0.27

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,Unnamed: 13_level_9,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,Unnamed: 13_level_10,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,Unnamed: 13_level_11,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11
Weight?,Feature,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12
Weight?,Feature,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13,Unnamed: 6_level_13,Unnamed: 7_level_13,Unnamed: 8_level_13,Unnamed: 9_level_13,Unnamed: 10_level_13,Unnamed: 11_level_13,Unnamed: 12_level_13,Unnamed: 13_level_13,Unnamed: 14_level_13,Unnamed: 15_level_13,Unnamed: 16_level_13
Weight?,Feature,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,Unnamed: 10_level_14,Unnamed: 11_level_14,Unnamed: 12_level_14,Unnamed: 13_level_14,Unnamed: 14_level_14,Unnamed: 15_level_14,Unnamed: 16_level_14
Weight?,Feature,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15,Unnamed: 6_level_15,Unnamed: 7_level_15,Unnamed: 8_level_15,Unnamed: 9_level_15,Unnamed: 10_level_15,Unnamed: 11_level_15,Unnamed: 12_level_15,Unnamed: 13_level_15,Unnamed: 14_level_15,Unnamed: 15_level_15,Unnamed: 16_level_15
Weight?,Feature,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16,Unnamed: 6_level_16,Unnamed: 7_level_16,Unnamed: 8_level_16,Unnamed: 9_level_16,Unnamed: 10_level_16,Unnamed: 11_level_16,Unnamed: 12_level_16,Unnamed: 13_level_16,Unnamed: 14_level_16,Unnamed: 15_level_16,Unnamed: 16_level_16
+9.689,word.lower():last,,,,,,,,,,,,,,,
+8.443,word.lower():next,,,,,,,,,,,,,,,
+6.998,word.lower():60,,,,,,,,,,,,,,,
+6.746,EOS,,,,,,,,,,,,,,,
+6.581,BOS,,,,,,,,,,,,,,,
+6.484,word.lower():internet,,,,,,,,,,,,,,,
+6.191,word.lower():hurricane,,,,,,,,,,,,,,,
+6.097,word.lower():chief,,,,,,,,,,,,,,,
+5.870,word.lower():years,,,,,,,,,,,,,,,
+5.812,word.lower():this,,,,,,,,,,,,,,,

Weight?,Feature
+9.689,word.lower():last
+8.443,word.lower():next
+6.998,word.lower():60
+6.746,EOS
+6.581,BOS
+6.484,word.lower():internet
+6.191,word.lower():hurricane
+6.097,word.lower():chief
+5.870,word.lower():years
+5.812,word.lower():this

Weight?,Feature
+7.055,word.lower():spanish
+6.118,word.lower():gdp
+5.849,word.lower():english
+4.669,word.lower():vioxx
+4.561,word.lower():canal
+4.227,word.lower():nevirapine
+4.167,word.lower():please
+4.062,word.lower():arabic
+4.025,word.lower():spaceshipone
+3.992,word.lower():facebook

Weight?,Feature
+2.708,+1:word.lower():gained
+2.518,-1:word.lower():boeing
+2.463,-1:word.lower():magazine
+2.322,word.lower():notice
+2.267,-1:word.lower():balad
+2.242,+1:word.lower():roses
+2.194,-1:word.lower():cajun
+2.170,word.lower():a
+2.106,word.lower():us
+2.089,-1:word.lower():k-4

Weight?,Feature
+6.247,word.lower():ii
+5.235,word.lower():games
+5.134,word.lower():olympic
+4.037,word.lower():ramadan
+3.768,word.lower():i
+3.468,-1:word.lower():war
+3.437,word.lower():katrina
+3.330,word.lower():olympics
+3.259,word.lower():holocaust
+3.227,+1:word.lower():war

Weight?,Feature
+3.996,word.lower():day
+3.229,word.lower():open
+3.181,word.lower():games
+3.168,+1:word.lower():mascots
+3.150,+1:word.lower():tore
+2.639,-1:word.lower():awareness
+2.637,word.lower():week
+2.388,-1:word.lower():hurricane
+2.350,word.lower():sabbath
+2.337,word.lower():series

Weight?,Feature
+7.974,word.lower():caribbean
+7.808,word.lower():u.s.
+6.651,word.lower():iran
+5.979,word.lower():martian
+5.892,word.lower():israel
+5.862,word.lower():disneyland
+5.727,word.lower():china
+5.656,word.lower():u.n.
+5.534,word.lower():iraq
+5.390,word.lower():russia

Weight?,Feature
+4.802,word.lower():gulf
+4.373,word.lower():east
+3.997,word.lower():republic
+3.679,word.lower():forces
+3.484,word.lower():island
+3.479,word.lower():airport
+3.393,word.lower():marines
+3.336,word.lower():caribbean
+3.213,word.lower():iran
+3.199,word.lower():marine

Weight?,Feature
+9.909,word.lower():palestinian
+9.853,word.lower():iraqi
+9.813,word.lower():israeli
+9.734,word.lower():nepal
+9.702,word.lower():palestinians
+9.468,word.lower():afghan
+9.412,word.lower():niger
+9.320,word.lower():iranian
+9.099,word.lower():iraqis
+8.936,word.lower():arabs

Weight?,Feature
+6.130,word.lower():cypriot
+5.446,-1:word.lower():bosnian
+4.632,word.lower():cypriots
+4.521,+1:word.lower():mayor
+4.317,word.lower():serb
+3.502,-1:word.lower():democratic
+3.319,word.lower():serbs
+3.129,-1:postag:NNP
+3.024,-1:word.lower():soviet
+2.979,word.lower():african

Weight?,Feature
+7.165,word.lower():katrina
+6.899,word.lower():marburg
+6.270,word.lower():h5n1
+5.916,word.lower():rita
+4.446,word.lower():paul
+4.400,word.lower():aids
+3.684,word.lower():ebola
+3.217,word.lower():her-2
+2.910,word.lower():acc
+2.866,+1:word.lower():strain

Weight?,Feature
+3.694,word.lower():rita
+2.515,-1:word.lower():hurricane
+2.498,word.lower():flu
+2.368,word.lower():katrina
+1.958,-1:word.lower():type
+1.862,+1:word.lower():rita
+1.861,-1:postag:NN
+1.854,word.lower():one
+1.763,-1:word.lower():hurricanes
+1.727,-1:postag[:2]:NN

Weight?,Feature
+9.576,word.lower():philippine
+7.817,word.lower():taleban
+7.549,word.lower():al-qaida
+6.743,word.lower():iraqi
+6.649,word.lower():taliban
+6.596,word.lower():eu
+6.467,word.lower():congress
+6.362,word.lower():hamas
+6.230,word.lower():nato
+6.158,word.lower():hezbollah

Weight?,Feature
+4.384,-1:word.lower():people
+4.123,-1:word.lower():armed
+4.104,word.lower():ministry
+3.777,-1:word.lower():associated
+3.753,word.lower():for
+3.676,-1:word.lower():english
+3.576,word.lower():nations
+3.564,+1:word.lower():post
+3.540,-1:word.lower():growth
… 6022 more positive …,… 6022 more positive …

Weight?,Feature
+7.627,word.lower():mr.
+7.506,word.lower():vice
+7.395,word.lower():prime
+7.374,word.lower():president
+7.043,word.lower():obama
+6.096,word.lower():western
+5.766,word.lower():senator
+5.631,word.lower():ms.
+5.326,word.lower():peter
+5.181,word.lower():khodorkovsky

Weight?,Feature
+3.409,word.lower():lankan
+3.338,word.lower():obama
+3.031,word.lower():sharon
+2.906,+1:word.lower():legally
+2.901,word.lower():cool
+2.901,+1:word.lower():cool
+2.896,-1:word.lower():paul
+2.890,-1:word.lower():davis
+2.859,-1:word.lower():condoleezza
… 6041 more positive …,… 6041 more positive …

Weight?,Feature
+11.211,word.lower():tuesday
+11.045,word.lower():monday
+11.043,word.lower():thursday
+11.022,word.lower():wednesday
+10.991,word.lower():friday
+10.699,word.lower():sunday
+10.643,word.lower():saturday
+8.665,word.lower():1980s
+8.438,word.lower():1970s
+8.423,word.lower():1990s

Weight?,Feature
+5.994,word.lower():day
+5.764,word.lower():sunday
+5.498,word.lower():monday
+5.493,word.lower():wednesday
+5.292,word.lower():friday
+5.232,word.lower():thursday
+5.036,word.lower():decades
+4.680,word.lower():tuesday
+4.634,word.lower():saturday
+4.452,word.lower():palestinian


In [19]:
import pickle
file_name = 'casual_test.pkl'
pickle.dump(crf,open(file_name,'wb'))