In [1]:
!pip install rasa_core
!pip install rasa_nlu[spacy]
!python -m spacy download en_core_web_lg
!python -m spacy link en_core_web_lg en


!pip install -U scikit-learn scipy sklearn-crfsuite
!pip install rasa_nlu
!pip install rasa_nlu[spacy]




Collecting rasa_core
[?25l  Downloading https://files.pythonhosted.org/packages/cd/7b/58a6a997023a8b1f6a0e292d08fd88b4d39ec325e103c6cebd1eb38f6545/rasa_core-0.14.5-py3-none-any.whl (212kB)
[K    100% |████████████████████████████████| 215kB 1.9MB/s 
[?25hCollecting mattermostwrapper~=2.0 (from rasa_core)
  Downloading https://files.pythonhosted.org/packages/d7/fd/f1ce046ddaeffa5073f87d7800c27ad2c8e543e924a8418675c64aea6a14/mattermostwrapper-2.2.tar.gz
Collecting rasa-core-sdk~=0.14 (from rasa_core)
  Downloading https://files.pythonhosted.org/packages/bc/5b/343937a872b497079803878e5a6f75deb4aeb00504d32601a0ffd37b90f4/rasa_core_sdk-0.14.0-py2.py3-none-any.whl
Collecting python-socketio~=3.0 (from rasa_core)
[?25l  Downloading https://files.pythonhosted.org/packages/b5/db/dc823aa0b3397e52ebe8bba62ee39d6b9a088377809bc230d3ad2e5d2448/python_socketio-3.1.2-py2.py3-none-any.whl (43kB)
[K    100% |████████████████████████████████| 51kB 5.7MB/s 
[?25hCollecting rasa-nlu~=0.15 (

# imports

In [2]:
import spacy
import yaml
from spacy import displacy
from collections import Counter
import en_core_web_sm
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer
from rasa_nlu.model import Metadata, Interpreter
from rasa_nlu import config

from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint
from nltk.chunk import ne_chunk

from bs4 import BeautifulSoup
import requests
import re
import os
import warnings
warnings.filterwarnings("ignore")

# About the data

-> A collection of reviews of cafes, restaurants in Dhaka City, Bangladesh.

**Dhaka - the capital of Bangladesh. There are a lot of tourists, businessmen and normal people in it. Often these people want to satisfy their hunger by eating at resturants. So the question arises: Is it worth eating in restaurants in this city? The reviews left by the restaurant guests come to our aid.**

**The database has almost 17,000 reviews. For each review, apart from its contents, the number of characters that has been determined, and whether the review is positive or negative was determined. A positive review is one where the rating was above 3 (on a scale of 1 to 5) or "positive" in words. Let's start!**

please check this notebook for more details : [What to order in Dhaka? 🍔](https://www.kaggle.com/michau96/what-to-order-in-dhaka) 



let's load the restaurant review,dhaka dataset

In [3]:
restaurantbd = pd.read_csv('../input/restaurant-reviews-in-dhaka-bangladesh/reviews.csv')
restaurantbd.head()

Unnamed: 0,Review Text,Review,Recommends
0,"Friendly staff, good food and homely environme...",,True
1,Well...The Food was Good___Intrerior design is...,,True
2,The man who is foodie like me for him arabian ...,5.0,
3,ordered pizza and they were unable to serve th...,,False
4,This place is too much comfortable & food is d...,4.0,


In [4]:
restaurantbd['Review Text'][1]

'Well...The Food was Good___Intrerior design is nice... Environment was soo quite Fakafaka😜 Background music was too good... I gives you 9/10😊'

In [5]:
ex = restaurantbd['Review Text'][1]

### applying word tokenization and part-of-speech tagging to the sentence.

In [6]:
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

sent = preprocess(ex)
sent

[('Well', 'RB'),
 ('...', ':'),
 ('The', 'DT'),
 ('Food', 'NNP'),
 ('was', 'VBD'),
 ('Good___Intrerior', 'NNP'),
 ('design', 'NN'),
 ('is', 'VBZ'),
 ('nice', 'JJ'),
 ('...', ':'),
 ('Environment', 'NNP'),
 ('was', 'VBD'),
 ('soo', 'VBN'),
 ('quite', 'RB'),
 ('Fakafaka😜', 'NNP'),
 ('Background', 'NNP'),
 ('music', 'NN'),
 ('was', 'VBD'),
 ('too', 'RB'),
 ('good', 'JJ'),
 ('...', ':'),
 ('I', 'PRP'),
 ('gives', 'VBZ'),
 ('you', 'PRP'),
 ('9/10😊', 'CD')]

### chunking

The basic technique we will use for entity detection is chunking, which segments and labels multi-token sequences as illustrated in (picture below). The smaller boxes show the word-level tokenization and part-of-speech tagging, while the large boxes show higher-level chunking. Each of these larger boxes is called a chunk. Like tokenization, which omits whitespace, chunking usually selects a subset of the tokens. Also like tokenization, the pieces produced by a chunker do not overlap in the source text.

![](http://www.nltk.org/images/chunk-segmentation.png)

https://stackoverflow.com/questions/39124492/nltk-regexpparser-chunk-phrase-by-matching-exactly-one-item

In [7]:
pattern = 'NP: {<DT>?<JJ>*<NN>}'
cp      = nltk.RegexpParser(pattern)
cs       = cp.parse(sent)

print(cs)

(S
  Well/RB
  .../:
  The/DT
  Food/NNP
  was/VBD
  Good___Intrerior/NNP
  (NP design/NN)
  is/VBZ
  nice/JJ
  .../:
  Environment/NNP
  was/VBD
  soo/VBN
  quite/RB
  Fakafaka😜/NNP
  Background/NNP
  (NP music/NN)
  was/VBD
  too/RB
  good/JJ
  .../:
  I/PRP
  gives/VBZ
  you/PRP
  9/10😊/CD)


tree2conlltags(t) - Return a list of 3-tuples containing (word, tag, IOB-tag). Convert a tree to the CoNLL IOB tag format.

check this : https://stackoverflow.com/questions/40879520/nltk-convert-a-chunked-tree-into-a-list-iob-tagging

In [8]:
iob_tagged = tree2conlltags(cs)
pprint(iob_tagged)

[('Well', 'RB', 'O'),
 ('...', ':', 'O'),
 ('The', 'DT', 'O'),
 ('Food', 'NNP', 'O'),
 ('was', 'VBD', 'O'),
 ('Good___Intrerior', 'NNP', 'O'),
 ('design', 'NN', 'B-NP'),
 ('is', 'VBZ', 'O'),
 ('nice', 'JJ', 'O'),
 ('...', ':', 'O'),
 ('Environment', 'NNP', 'O'),
 ('was', 'VBD', 'O'),
 ('soo', 'VBN', 'O'),
 ('quite', 'RB', 'O'),
 ('Fakafaka😜', 'NNP', 'O'),
 ('Background', 'NNP', 'O'),
 ('music', 'NN', 'B-NP'),
 ('was', 'VBD', 'O'),
 ('too', 'RB', 'O'),
 ('good', 'JJ', 'O'),
 ('...', ':', 'O'),
 ('I', 'PRP', 'O'),
 ('gives', 'VBZ', 'O'),
 ('you', 'PRP', 'O'),
 ('9/10😊', 'CD', 'O')]


In [9]:
ne_tree = ne_chunk(pos_tag(word_tokenize(ex)))
print(ne_tree)

(S
  Well/RB
  .../:
  The/DT
  (ORGANIZATION Food/NNP)
  was/VBD
  (ORGANIZATION Good___Intrerior/NNP)
  design/NN
  is/VBZ
  nice/JJ
  .../:
  Environment/NNP
  was/VBD
  soo/VBN
  quite/RB
  Fakafaka😜/NNP
  Background/NNP
  music/NN
  was/VBD
  too/RB
  good/JJ
  .../:
  I/PRP
  gives/VBZ
  you/PRP
  9/10😊/CD)


### Entity

In [10]:
!pip3 install spacy
!python3 -m spacy download en_core_web_sm

[33mYou are using pip version 19.0.3, however version 20.2.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting en_core_web_sm==2.0.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz#egg=en_core_web_sm==2.0.0
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz (37.4MB)
[K    100% |████████████████████████████████| 37.4MB 73.1MB/s 
[?25hInstalling collected packages: en-core-web-sm
  Found existing installation: en-core-web-sm 2.1.0
    Uninstalling en-core-web-sm-2.1.0:
      Successfully uninstalled en-core-web-sm-2.1.0
  Running setup.py install for en-core-web-sm ... [?25l- \ done
[?25hSuccessfully installed en-core-web-sm-2.0.0
[33mYou are using pip version 19.0.3, however version 20.2.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' co

In [11]:
nlp = en_core_web_sm.load()

![](http://www.nltk.org/images/ie-architecture.png)

*Simple Pipeline Architecture for an Information Extraction System. This system takes the raw text of a document as its input, and generates a list of (entity, relation, entity) tuples as its output. For example, given a document that indicates that the company Georgia-Pacific is located in Atlanta, it might generate the tuple ([ORG: 'Georgia-Pacific'] 'in' [LOC: 'Atlanta']).*

In [12]:

nlp = spacy.load('en')
docx = nlp(u"I am looking for an Italian Restaurant where I can eat")

for word in docx.ents: 
    print("1.value : ",word.text,"2.entity : ",word.label_,"3.start : ",word.start_char,"4.end : ",word.end_char)

1.value :  Italian 2.entity :  NORP 3.start :  20 4.end :  27


In [13]:
for i in range(5):
    ex = restaurantbd['Review Text'][i]
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW<<<<<<<<<<<<<<<<<<<<<<<<<<<")
    print(ex)
    doc = nlp(ex)
    print("************************OUTPUT*********************************")
    pprint([(X.text, X.label_) for X in doc.ents])

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW<<<<<<<<<<<<<<<<<<<<<<<<<<<
Friendly staff, good food and homely environment 💜💜💜
************************OUTPUT*********************************
[('💜💜💜', 'PERSON')]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW<<<<<<<<<<<<<<<<<<<<<<<<<<<
Well...The Food was Good___Intrerior design is nice... Environment was soo quite Fakafaka😜 Background music was too good... I gives you 9/10😊
************************OUTPUT*********************************
[]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW<<<<<<<<<<<<<<<<<<<<<<<<<<<
The man who is foodie like me for him arabian master is a nice place. Environment and food quality is too good. Specially their staffs’ behaviour are so nice. Good environment, quality food and and suitable price with these arabian master is awesome in a word.
************************OUTPUT*********************************
[]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW<<<<<<<<<<<<<<<<<<<<<<<<<<<
ordered pizza and they were unable to serve then ordered set menu 

In [14]:
doc = nlp(ex)
pprint([(X.text, X.label_) for X in doc.ents])

[]


### Token

<ul>
    <li>B ==> the token begins an entity</li>
    <li>I ==> the token is inside an entity</li>
    <li>O ==> the token is outside an entity</li>
    <li>' ' ==> no entity tag is set</li>
</ul>

you can see we have got "[]" several times,why? 
the answer is here : https://medium.com/@rajat.jain1/natural-language-extraction-using-spacy-on-a-set-of-novels-88b159d68686

Here is a summary of spaCy’s entity types.

![](https://miro.medium.com/max/767/0*SqZbOvfNIdPsuVh3.png)

In [15]:
pprint([(X, X.ent_iob_, X.ent_type_) for X in doc])

[(This, 'O', ''),
 (place, 'O', ''),
 (is, 'O', ''),
 (too, 'O', ''),
 (much, 'O', ''),
 (comfortable, 'O', ''),
 (&, 'O', ''),
 (food, 'O', ''),
 (is, 'O', ''),
 (delicious, 'O', ''),
 (.., 'O', ''),
 (Every, 'O', ''),
 (Item, 'O', ''),
 (Is, 'O', ''),
 (testy, 'O', ''),
 (&, 'O', ''),
 (pizza, 'O', ''),
 (or, 'O', ''),
 (set, 'O', ''),
 (menu, 'O', ''),
 (is, 'O', ''),
 (Best, 'O', ''),
 (..., 'O', ''),
 (&, 'O', ''),
 (most, 'O', ''),
 (important, 'O', ''),
 (Thing, 'O', ''),
 (is, 'O', ''),
 (Their, 'O', ''),
 (behavior, 'O', ''),
 (is, 'O', ''),
 (friendly, 'O', ''),
 (.., 'O', ''),
 (It, 'O', ''),
 (is, 'O', ''),
 (very, 'O', ''),
 (good, 'O', ''),
 (place, 'O', ''),
 (.., 'O', ''),
 (environment, 'O', ''),
 (is, 'O', ''),
 (good, 'O', ''),
 (enough, 'O', ''),
 (.., 'O', ''),
 (behavior, 'O', ''),
 (of, 'O', ''),
 (all, 'O', ''),
 (the, 'O', ''),
 (staff, 'O', ''),
 (is, 'O', ''),
 (good, 'O', ''),
 (and, 'O', ''),
 (prices, 'O', ''),
 (are, 'O', ''),
 (reasonable, 'O', ''),
 (..

In [16]:
nlp0 = spacy.load('en_core_web_sm')
nlp0.pipe_names

['tagger', 'parser', 'ner']

# Using Rasa NLU and SpaCy

![](https://cdn-images-1.medium.com/max/1000/1*Uf_qQ0zF8G8y9zUhndA08w.png)

we have 2 datasets, 

>>>Datasets<<<

1. demo-rasa.json
2. config_spacy.yaml

let's explore them

inside rasa_data.json we have :

In [17]:
!cat ../input/rasaspacydata/rasa_data.json










































































































































































































































































































































  }

during interactive mode it shows correct output but after kernel commit i get blank black (empty) output,probably some  docker issue in kaggle,so i am pasting interactive mode's output as markdown below : 

{
    "rasa_nlu_data": {
      "regex_features": [
        {
          "name": "zipcode",
          "pattern": "[0-9]{5}"
        },
        {
          "name": "greet",
          "pattern": "hey[^\\s]*"
        }
      ],
      "entity_synonyms": [
        {
          "value": "chinese",
          "synonyms": ["Chinese", "Chines", "chines"]
        },
        {
          "value": "vegetarian",
          "synonyms": ["veggie", "vegg"]
        }
      ],
      "common_examples": [
        {
          "text": "hey", 
          "intent": "greet", 
          "entities": []
        }, 
        {
          "text": "howdy", 
          "intent": "greet", 
          "entities": []
        }, 
        {
          "text": "hey there",
          "intent": "greet", 
          "entities": []
        }, 
        {
          "text": "hello", 
          "intent": "greet", 
          "entities": []
        }, 
        {
          "text": "hi", 
          "intent": "greet", 
          "entities": []
        },
        {
          "text": "good morning",
          "intent": "greet",
          "entities": []
        },
        {
          "text": "good evening",
          "intent": "greet",
          "entities": []
        },
        {
          "text": "dear sir",
          "intent": "greet",
          "entities": []
        },
        {
          "text": "yes", 
          "intent": "affirm", 
          "entities": []
        }, 
        {
          "text": "yep", 
          "intent": "affirm", 
          "entities": []
        }, 
        {
          "text": "yeah", 
          "intent": "affirm", 
          "entities": []
        },
        {
          "text": "indeed",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "that's right",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "ok",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "great",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "right, thank you",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "correct",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "great choice",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "sounds really good",
          "intent": "affirm",
          "entities": []
        },
        {
          "text": "i'm looking for a place to eat",
          "intent": "restaurant_search",
          "entities": []
        },
        {
          "text": "I want to grab lunch",
          "intent": "restaurant_search",
          "entities": []
        },
        {
          "text": "I am searching for a dinner spot",
          "intent": "restaurant_search",
          "entities": []
        },
        {
          "text": "i'm looking for a place in the north of town",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 31,
              "end": 36,
              "value": "north",
              "entity": "location"
            }
          ]
        },
        {
          "text": "show me chinese restaurants",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 8,
              "end": 15,
              "value": "chinese",
              "entity": "cuisine"
            }
          ]
        },
        {
          "text": "show me chines restaurants in the north",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 8,
              "end": 14,
              "value": "chinese",
              "entity": "cuisine"
            },
            {
              "start": 34,
              "end": 39,
              "value": "north",
              "entity": "location"
            }
          ]
        },
        {
          "text": "show me a mexican place in the centre", 
          "intent": "restaurant_search", 
          "entities": [
            {
              "start": 31, 
              "end": 37, 
              "value": "centre", 
              "entity": "location"
            }, 
            {
              "start": 10, 
              "end": 17, 
              "value": "mexican", 
              "entity": "cuisine"
            }
          ]
        },
        {
          "text": "i am looking for an indian spot called olaolaolaolaolaola",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 20,
              "end": 26,
              "value": "indian",
              "entity": "cuisine"
            }
          ]
        },     {
          "text": "search for restaurants",
          "intent": "restaurant_search",
          "entities": []
        },
        {
          "text": "anywhere in the west",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 16,
              "end": 20,
              "value": "west",
              "entity": "location"
            }
          ]
        },
        {
          "text": "anywhere near 18328",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 14,
              "end": 19,
              "value": "18328",
              "entity": "location"
            }
          ]
        },
        {
          "text": "I am looking for asian fusion food",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 17,
              "end": 29,
              "value": "asian fusion",
              "entity": "cuisine"
            }
          ]
        },
        {
          "text": "I am looking a restaurant in 29432",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 29,
              "end": 34,
              "value": "29432",
              "entity": "location"
            }
          ]
        },
        {
          "text": "I am looking for mexican indian fusion",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 17,
              "end": 38,
              "value": "mexican indian fusion",
              "entity": "cuisine"
            }
          ]
        },
        {
          "text": "central indian restaurant",
          "intent": "restaurant_search",
          "entities": [
            {
              "start": 0,
              "end": 7,
              "value": "central",
              "entity": "location"
            },
            {
              "start": 8,
              "end": 14,
              "value": "indian",
              "entity": "cuisine"
            }
          ]
        },
        {
          "text": "bye", 
          "intent": "goodbye", 
          "entities": []
        }, 
        {
          "text": "goodbye", 
          "intent": "goodbye", 
          "entities": []
        }, 
        {
          "text": "good bye", 
          "intent": "goodbye", 
          "entities": []
        }, 
        {
          "text": "stop", 
          "intent": "goodbye", 
          "entities": []
        }, 
        {
          "text": "end", 
          "intent": "goodbye", 
          "entities": []
        },
        {
          "text": "farewell",
          "intent": "goodbye",
          "entities": []
        },
        {
          "text": "Bye bye",
          "intent": "goodbye",
          "entities": []
        },
        {
          "text": "have a good one",
          "intent": "goodbye",
          "entities": []
        }
      ]
    }
  }

let's load rasa_data.json data

In [18]:
train = load_data("../input/rasaspacydata/rasa_data.json")
train

<rasa_nlu.training_data.training_data.TrainingData at 0x7f25832a0b70>

In [19]:
Trainer.train?

In [20]:
#mytrainer = Trainer.train(config.load("../input/rasaspacydata/config_spacy.yaml"))

let's read config_spacy.yaml file

In [21]:
with open("../input/rasaspacydata/config_spacy.yaml", 'r') as stream:
    try:
        print(yaml.safe_load(stream))
    except yaml.YAMLError as exc:
        print(exc)

{'language': 'en', 'pipeline': 'spacy_sklearn'}


In [22]:

training_data = load_data('../input/rasaspacydata/rasa_data.json')
trainer = Trainer(config.load('../input/rasaspacydata/config_spacy.yaml')) 
trainer.train(training_data)
model_directory = trainer.persist('./models/')  # Returns the directory the model is stored in

Fitting 2 folds for each of 6 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s finished


In [23]:

# where `model_directory points to the folder the model is persisted in
interpreter = Interpreter.load(model_directory)

**Testing on simple sentence!**

In [24]:
# Prediction of Intent
interpreter.parse(u"I am looking for an Italian Restaurant where I can eat")

{'intent': {'name': 'restaurant_search', 'confidence': 0.6954585646004395},
 'entities': [],
 'intent_ranking': [{'name': 'restaurant_search',
   'confidence': 0.6954585646004395},
  {'name': 'affirm', 'confidence': 0.1866532286261028},
  {'name': 'greet', 'confidence': 0.06439299878643577},
  {'name': 'goodbye', 'confidence': 0.053495207987021565}],
 'text': 'I am looking for an Italian Restaurant where I can eat'}

In [25]:
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

Testing on [**Restaurant Reviews in Dhaka, Bangladesh**](https://www.kaggle.com/tuxboy/restaurant-reviews-in-dhaka-bangladesh) dataset

In [26]:
for i in range(10):
    ex = restaurantbd['Review Text'][i]
    ex = interpreter.parse(ex)
    print(bcolors.OKBLUE + "---------------------------------------------------------------------------------------" + bcolors.OKBLUE)
    print(ex)

[94m---------------------------------------------------------------------------------------[94m
{'intent': {'name': 'restaurant_search', 'confidence': 0.5466333279849948}, 'entities': [], 'intent_ranking': [{'name': 'restaurant_search', 'confidence': 0.5466333279849948}, {'name': 'affirm', 'confidence': 0.20967939386792822}, {'name': 'greet', 'confidence': 0.13802598776133976}, {'name': 'goodbye', 'confidence': 0.10566129038573698}], 'text': 'Friendly staff, good food and homely environment 💜💜💜'}
[94m---------------------------------------------------------------------------------------[94m
{'intent': {'name': 'restaurant_search', 'confidence': 0.5194643704668908}, 'entities': [], 'intent_ranking': [{'name': 'restaurant_search', 'confidence': 0.5194643704668908}, {'name': 'affirm', 'confidence': 0.2723965342149333}, {'name': 'greet', 'confidence': 0.11076155351774332}, {'name': 'goodbye', 'confidence': 0.09737754180043252}], 'text': 'Well...The Food was Good___Intrerior design is n

references : 
1. https://github.com/RasaHQ/rasa/issues/2057
2. https://github.com/RasaHQ/rasa/issues/1066
3. https://legacy-docs.rasa.com/docs/nlu/0.9.2/python/
4. http://www.nltk.org/book_1ed/ch07.html

NOTE : this problem occurs in google colab [**How to resolve KeyError: 'PUNKTSIDE_FIN'**](https://stackoverflow.com/questions/60068824/how-to-resolve-keyerror-punktside-fin)