In [1]:
from IPython.display import display, Markdown

with open("data/example.md", "r") as f:
    display(Markdown(f.read()))

## restaurant_search
- i'm looking for a place to eat
- I want to grab lunch
- I am searching for a dinner spot
- i'm looking for a place in the [north](location) of town
- show me [chinese](cuisine) restaurants
- show me [chines](cuisine:chinese) restaurants in the [north](location)
- show me a [mexican](cuisine) place in the [centre](location)
- i am looking for an [indian](cuisine) spot called olaolaolaolaolaola
- search for restaurants
- anywhere in the [west](location)
- anywhere near [18328](location)
- I am looking for [asian fusion](cuisine) food
- I am looking a restaurant in [29432](location)
- I am looking for [mexican indian fusion](cuisine)
- [central](location) [indian](cuisine) restaurant

In [2]:
from spacy_crfsuite import read_file

train_data = read_file("data/example.md")
train_data

[{'text': "i'm looking for a place to eat", 'entities': []},
 {'text': 'I want to grab lunch', 'entities': []},
 {'text': 'I am searching for a dinner spot', 'entities': []},
 {'text': "i'm looking for a place in the north of town",
  'entities': [{'start': 31,
    'end': 36,
    'value': 'north',
    'entity': 'location'}]},
 {'text': 'show me chinese restaurants',
  'entities': [{'start': 8,
    'end': 15,
    'value': 'chinese',
    'entity': 'cuisine'}]},
 {'text': 'show me chines restaurants in the north',
  'entities': [{'start': 8,
    'end': 14,
    'value': 'chinese',
    'entity': 'cuisine'},
   {'start': 34, 'end': 39, 'value': 'north', 'entity': 'location'}]},
 {'text': 'show me a mexican place in the centre',
  'entities': [{'start': 10,
    'end': 17,
    'value': 'mexican',
    'entity': 'cuisine'},
   {'start': 31, 'end': 37, 'value': 'centre', 'entity': 'location'}]},
 {'text': 'i am looking for an indian spot called olaolaolaolaolaola',
  'entities': [{'start': 20,
  

In [3]:
import srsly

component_config = srsly.read_json("data/config.json")
component_config

{'c1': 0.03, 'c2': 0.06}

In [4]:
from spacy_crfsuite import CRFExtractor

crf_extractor = CRFExtractor(component_config=component_config)
crf_extractor

<spacy_crfsuite.crf_extractor.CRFExtractor at 0x12a569b38>

In [5]:
from spacy_crfsuite.crf_extractor import prepare_example
from spacy_crfsuite.dense_features import DenseFeatures
from spacy_crfsuite.tokenizer import SpacyTokenizer

tokenizer = SpacyTokenizer()

if crf_extractor.use_dense_features():
    dense_features = DenseFeatures()
else:
    dense_features = None

train_dataset = [
    prepare_example(ex, 
                    crf_extractor=crf_extractor, 
                    tokenizer=tokenizer, 
                    dense_features=dense_features) for ex in train_data
]

train_dataset[0]

[CRFToken(text='i', tag='PRP', entity='O', pattern={}, dense_features=[]),
 CRFToken(text="'m", tag='VBP', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='looking', tag='', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='for', tag='', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='a', tag='', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='place', tag='', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='to', tag='', entity='O', pattern={}, dense_features=[]),
 CRFToken(text='eat', tag='', entity='O', pattern={}, dense_features=[])]

In [6]:
crf_extractor.train(train_dataset)

<spacy_crfsuite.crf_extractor.CRFExtractor at 0x12a569b38>

In [7]:
# ! pip install sklearn

classification_report = crf_extractor.eval(train_dataset)
print(classification_report[1])

              precision    recall  f1-score   support

   B-cuisine      1.000     1.000     1.000         2
   I-cuisine      1.000     1.000     1.000         1
   L-cuisine      1.000     1.000     1.000         2
   U-cuisine      1.000     1.000     1.000         5
  U-location      1.000     1.000     1.000         7

   micro avg      1.000     1.000     1.000        17
   macro avg      1.000     1.000     1.000        17
weighted avg      1.000     1.000     1.000        17





In [8]:
print(crf_extractor.explain())

Most likely transitions:
O          -> O          1.617362
U-cuisine  -> O          1.277659
B-cuisine  -> I-cuisine  1.206597
I-cuisine  -> L-cuisine  0.800963
O          -> U-location 0.719703
B-cuisine  -> L-cuisine  0.589600
O          -> U-cuisine  0.402591
U-location -> U-cuisine  0.325804
O          -> B-cuisine  0.150878
L-cuisine  -> O          0.087336

Positive features:
2.186071 O          0:bias:bias
1.973212 U-location -1:low:the
1.135395 B-cuisine  -1:low:for
1.121395 U-location 0:prefix5:centr
1.121395 U-location 0:prefix2:ce
1.106081 U-location 0:digit
1.019241 U-cuisine  0:prefix5:chine
1.019241 U-cuisine  0:prefix2:ch
1.011240 U-cuisine  0:suffix2:an
0.945071 U-cuisine  -1:low:me


In [9]:
from spacy_crfsuite.tokenizer import SpacyTokenizer
from spacy_crfsuite.dense_features import DenseFeatures

example = {"text": "show mexican restaurents up north"}
prepare_example(example, crf_extractor=crf_extractor, tokenizer=tokenizer, dense_features=dense_features)
crf_extractor.process(example)

[{'start': 5,
  'end': 12,
  'value': 'mexican',
  'entity': 'cuisine',
  'confidence': 0.5823148506311286},
 {'start': 28,
  'end': 33,
  'value': 'north',
  'entity': 'location',
  'confidence': 0.8863076478494413}]

In [10]:
import spacy

from spacy_crfsuite import CRFEntityExtractor

nlp = spacy.blank('en')
pipe = CRFEntityExtractor(nlp, crf_extractor=crf_extractor)
nlp.add_pipe(pipe)

doc = nlp("show mexican restaurents up north")
for ent in doc.ents:
    print(ent.text, "--", ent.label_)

mexican -- cuisine
north -- location


In [None]:
crf_extractor.to_disk()