In [23]:
 from nltk.corpus import wordnet
 import nltk
 nltk.download('wordnet')
 # lets use word paint as an exqmple
syns = wordnet.synsets("kansas")

# An example of a synset:
print(syns[0].name())
print('\n')
# Just the word:
print(syns[0].lemmas()[0].name())
print('\n')

# Definition of that first synset:
print(syns[0].definition())
print('\n')
# Examples of the word in use in sentences:
print(syns[0].examples())
print('\n')

# synonyms and antonyms using wordnet using word
synonyms = []
antonyms = []

for syn in wordnet.synsets("kansas"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())
print('The synonyms of kansas are: ')
print(set(synonyms))
print('\n')
print('The antonyms of kansas are: ')
print(set(antonyms))
print('\n')



# comparison/ similarity score between 2 words
w1 = wordnet.synset('ship.n.01')
w2 = wordnet.synset('boat.n.01') # n denotes noun
print("The similarity score betwee ship and boat is =",w1.wup_similarity(w2))


print('Set of hyponyms:\n', syns[0].hyponyms(), '\n' )
print('Set of hypernyms:\n', syns[0].hypernyms(), '\n' )
print('Set of part-meronyms:\n', wordnet.synset('table.n.2').part_meronyms(), '\n' )
print('Set of member-holonyms:\n', wordnet.synset('kitchen.n.01').part_holonyms() , '\n' )
print('Set of part-holonyms:\n', wordnet.synset('course.n.7').part_holonyms(), '\n' )
print('Entailment of word Breathe:\n', wordnet.synset('snore.v.01').entailments(), '\n' )

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
kansas.n.01


Kansas


a state in midwestern United States


[]


The synonyms of kansas are: 
{'Kaw_River', 'Kansas', 'Kansas_River', 'Kansa', 'KS', 'Sunflower_State'}


The antonyms of kansas are: 
set()


The similarity score betwee ship and boat is = 0.9090909090909091
Set of hyponyms:
 [] 

Set of hypernyms:
 [] 

Set of part-meronyms:
 [Synset('leg.n.03'), Synset('tabletop.n.01'), Synset('tableware.n.01')] 

Set of member-holonyms:
 [Synset('dwelling.n.01')] 

Set of part-holonyms:
 [Synset('meal.n.01')] 

Entailment of word Breathe:
 [Synset('sleep.v.01')] 



In [29]:
{
 "cells": [
  {
   "cell_type": "Run",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd\n",
    "import bs4\n",
    "import requests\n",
    "import spacy\n",
    "from spacy import displacy\n",
    "nlp = spacy.load('en_Run_web_sm')\n",
    "\n",
    "from spacy.matcher import Matcher \n",
    "from spacy.tokens import Span \n",
    "\n",
    "import networkx as nx\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm import tqdm\n",
    "\n",
    "pd.set_option('display.max_colwidth', 200)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sample sentences\n",
    "candidate_sentences = \"the drawdown process is governed by astm standard d823\"\n",
    "doc = nlp(candidate_sentences)"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the ... det\n",
      "drawdown ... amod\n",
      "process ... nsubjpass\n",
      "is ... auxpass\n",
      "governed ... ROOT\n",
      "by ... agent\n",
      "astm ... compound\n",
      "standard ... amod\n",
      "d823 ... pobj\n"
     ]
    }
   ],
   "source": [
    "for tok in doc:\n",
    "    print(tok.text, \"...\", tok.dep_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Entity Pairs Extraction"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_entities(sent):\n",
    "  ## chunk 1\n",
    "  ent1 = \"\"\n",
    "  ent2 = \"\"\n",
    "\n",
    "  prv_tok_dep = \"\"    # dependency tag of previous token in the sentence\n",
    "  prv_tok_text = \"\"   # previous token in the sentence\n",
    "\n",
    "  prefix = \"\"\n",
    "  modifier = \"\"\n",
    "\n",
    "  #############################################################\n",
    "  \n",
    "  for tok in nlp(sent):\n",
    "    ## chunk 2\n",
    "    # if token is a punctuation mark then move on to the next token\n",
    "    if tok.dep_ != \"punct\":\n",
    "      # check: token is a compound word or not\n",
    "      if tok.dep_ == \"compound\":\n",
    "        prefix = tok.text\n",
    "        # if the previous word was also a 'compound' then add the current word to it\n",
    "        if prv_tok_dep == \"compound\":\n",
    "          prefix = prv_tok_text + \" \"+ tok.text\n",
    "      \n",
    "      # check: token is a modifier or not\n",
    "      if tok.dep_.endswith(\"mod\") == True:\n",
    "        modifier = tok.text\n",
    "        # if the previous word was also a 'compound' then add the current word to it\n",
    "        if prv_tok_dep == \"compound\":\n",
    "          modifier = prv_tok_text + \" \"+ tok.text\n",
    "      \n",
    "      ## chunk 3\n",
    "      if tok.dep_.find(\"subj\") == True:\n",
    "        ent1 = modifier +\" \"+ prefix + \" \"+ tok.text\n",
    "        prefix = \"\"\n",
    "        modifier = \"\"\n",
    "        prv_tok_dep = \"\"\n",
    "        prv_tok_text = \"\"      \n",
    "\n",
    "      ## chunk 4\n",
    "      if tok.dep_.find(\"obj\") == True:\n",
    "        ent2 = modifier +\" \"+ prefix +\" \"+ tok.text\n",
    "        \n",
    "      ## chunk 5  \n",
    "      # update variables\n",
    "      prv_tok_dep = tok.dep_\n",
    "      prv_tok_text = tok.text\n",
    "  #############################################################\n",
    "\n",
    "  return [ent1.strip(), ent2.strip()]"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['film', '200  patents']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_entities(\"the film had 200 patents\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Entity Relation Extraction"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_relation(sent):\n",
    "\n",
    "  doc = nlp(sent)\n",
    "\n",
    "  # Matcher class object \n",
    "  matcher = Matcher(nlp.vocab)\n",
    "\n",
    "  #define the pattern \n",
    "  pattern = [{'DEP':'ROOT'}, \n",
    "            {'DEP':'prep','OP':\"?\"},\n",
    "            {'DEP':'agent','OP':\"?\"},  \n",
    "            {'POS':'ADJ','OP':\"?\"}] \n",
    "\n",
    "  matcher.add(\"matching_1\", None, pattern) \n",
    "\n",
    "  matches = matcher(doc)\n",
    "  k = len(matches) - 1\n",
    "\n",
    "  span = doc[matches[k][1]:matches[k][2]] \n",
    "\n",
    "  return(span.text)"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'completed'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_relation(\"John completed the task\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Combining above both for triplets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Example:1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "text=\"tony completed the task\""
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "ent=get_entities(text)\n",
    "rel=get_relation(text)"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['tony', 'task']"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ent"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['tony', 'completed', 'task']\n"
     ]
    }
   ],
   "source": [
    "new_list=[]\n",
    "if len(ent)==2:\n",
    "    for i,n in enumerate(ent):\n",
    "        #print(i,n)\n",
    "        if i==1:\n",
    "            new_list.append(rel) \n",
    "        else:\n",
    "            new_list.append(n)\n",
    "    new_list.append(ent[1])\n",
    "print(new_list)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Example:2"
   ]
  },
  {
   "cell_type": "Run",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['drawdown  process', 'governed by', 'astm standard astm d823']\n"
     ]
    }
   ],
   "source": [
    "text=\"the drawdown process is governed by astm standard d823\"\n",
    "ent=get_entities(text)\n",
    "rel=get_relation(text)\n",
    "new_list=[]\n",
    "if len(ent)==2:\n",
    "    for i,n in enumerate(ent):\n",
    "        #print(i,n)\n",
    "        if i==1:\n",
    "            new_list.append(rel) \n",
    "        else:\n",
    "            new_list.append(n)\n",
    "    new_list.append(ent[1])\n",
    "print(new_list)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}



{'cells': [{'cell_type': 'Run',
   'execution_count': 1,
   'metadata': {},
   'outputs': [],
   'source': ['import re\n',
    'import pandas as pd\n',
    'import bs4\n',
    'import requests\n',
    'import spacy\n',
    'from spacy import displacy\n',
    "nlp = spacy.load('en_Run_web_sm')\n",
    '\n',
    'from spacy.matcher import Matcher \n',
    'from spacy.tokens import Span \n',
    '\n',
    'import networkx as nx\n',
    '\n',
    'import matplotlib.pyplot as plt\n',
    'from tqdm import tqdm\n',
    '\n',
    "pd.set_option('display.max_colwidth', 200)\n",
    '%matplotlib inline']},
  {'cell_type': 'Run',
   'execution_count': 2,
   'metadata': {},
   'outputs': [],
   'source': ['# sample sentences\n',
    'candidate_sentences = "the drawdown process is governed by astm standard d823"\n',
    'doc = nlp(candidate_sentences)']},
  {'cell_type': 'Run',
   'execution_count': 4,
   'metadata': {},
   'outputs': [{'name': 'stdout',
     'output_type': 'stream',
     'text': 