# Process Synonyms

This notebook uses a combination of Python data science libraries and the Google Natural Language API (machine learning) to expand the vocabulary of the chatbot by generating synonyms for topics created in the previous notebook.

In [1]:
!pip uninstall -y google-cloud-datastore

Uninstalling google-cloud-datastore-1.15.0:
  Successfully uninstalled google-cloud-datastore-1.15.0


In [2]:
!pip install google-cloud-datastore

Collecting google-cloud-datastore
  Using cached https://files.pythonhosted.org/packages/40/7c/e1dec4fd96448fded7812f23be75cc3697534e7252d018499a9fb40fb9cc/google_cloud_datastore-1.15.0-py2.py3-none-any.whl
Installing collected packages: google-cloud-datastore
Successfully installed google-cloud-datastore-1.15.0


In [3]:
!pip install inflect

Collecting inflect
  Downloading https://files.pythonhosted.org/packages/2a/14/49a8afaaa66fb49cda8e60512f0fc07594232fb10ea6aa8995c069172cf6/inflect-3.0.2-py2.py3-none-any.whl
Collecting importlib-metadata (from inflect)
  Downloading https://files.pythonhosted.org/packages/8e/58/cdea07eb51fc2b906db0968a94700866fc46249bdc75cac23f9d13168929/importlib_metadata-1.7.0-py2.py3-none-any.whl
Collecting zipp>=0.5 (from importlib-metadata->inflect)
  Downloading https://files.pythonhosted.org/packages/96/0a/67556e9b7782df7118c1f49bdc494da5e5e429c93aa77965f33e81287c8c/zipp-1.2.0-py2.py3-none-any.whl
Collecting contextlib2; python_version < "3" (from importlib-metadata->inflect)
  Downloading https://files.pythonhosted.org/packages/85/60/370352f7ef6aa96c52fb001831622f50f923c1d575427d021b8ab3311236/contextlib2-0.6.0.post1-py2.py3-none-any.whl
Installing collected packages: contextlib2, zipp, importlib-metadata, inflect
Successfully installed contextlib2-0.6.0.post1 importlib-metadata-1.7.0 inflect-

Hit Reset Session > Restart, then resume with the following cells. 

In [3]:
# Only need to do this once...
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /content/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /content/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))

In [4]:
from google.cloud import datastore

In [5]:
datastore_client = datastore.Client()

In [6]:
client = datastore.Client()
query = client.query(kind='Topic')
results = list(query.fetch())

In [7]:
import inflect
plurals = inflect.engine()

## Extract Synonyms with Python
Split the topic into words and use PyDictionary to look up synonyms in a "thesaurus" for each word.  Store these in Datastore and link them back to the topic.  Note this section uses the concept of "stop words" to filter out articles and other parts of speech that don't contribute to meaning of the topic.

In [8]:
from nltk.corpus import wordnet
from sets import Set

for result in results:
  for word in result.key.name.split():
    
    if word in stop:
        continue

    
    synonyms = Set()
    for syn in wordnet.synsets(word):
      
      if ".n." in str(syn):

        for l in syn.lemmas():
          lemma = l.name()
          if (lemma.isalpha()):
            synonyms.add(lemma)
            synonyms.add(plurals.plural(lemma))
      
      if ".a." in str(syn):
        synonyms = Set()
        break

    print result.key.name, word, synonyms
    
    kind = 'Synonym'
    synonym_key = datastore_client.key(kind, result.key.name)

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    
    synonym_key = datastore_client.key(kind, word)

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    
    for dictionary_synonym in synonyms:
      
      synonym_key = datastore_client.key(kind, dictionary_synonym)

      synonym = datastore.Entity(key=synonym_key)
      synonym['synonym'] = result.key.name

      datastore_client.put(synonym)
      
    synonym_key = datastore_client.key(kind, plurals.plural(word))

    synonym = datastore.Entity(key=synonym_key)
    synonym['synonym'] = result.key.name

    datastore_client.put(synonym)
    

  from ipykernel import kernelapp as app
  _warn_if_not_unicode(string)


annual salary annual Set([])
annual salary salary Set([u'wage', u'salary', u'remuneration', u'pay', u'salaries', u'earnings', u'pays', u'wages', u'earning', u'remunerations'])
compassionate leave compassionate Set([])
compassionate leave leave Set([u'partings', u'leaves', u'farewells', u'leave', u'farewell', u'parting'])
disability leave disability Set([u'handicap', u'disabilities', u'disability', u'disablements', u'handicaps', u'disablement', u'impairment', u'impairments'])
disability leave leave Set([u'partings', u'leaves', u'farewells', u'leave', u'farewell', u'parting'])
discipline discipline Set([u'discipline', u'bailiwicks', u'disciplines', u'fields', u'study', u'field', u'subjects', u'bailiwick', u'corrections', u'studies', u'correction', u'subject'])
employee classifications employee Set([u'employee', u'employees'])
employee classifications classifications Set([u'categorisation', u'compartmentalisations', u'categorization', u'classifications', u'compartmentalizations', u'classi