# Exploration of spaCy with allennlp SRL, NLTK WordNet and PyDictionary

In [1]:
# Imports and set up
from allennlp.predictors.predictor import Predictor

import spacy
from spacy.tokens import Doc, Span, Token
from spacy.language import Language

nlp = spacy.load('en_core_web_trf')

In [2]:
@Language.factory("srl", default_config={
    "model_path": "../dna/resources/structured-prediction-srl-bert.2020.12.15.tar.gz"})
def create_srl_component(nlp: Language, name: str, model_path: str):
    return SRLComponent(nlp, model_path)

class SRLComponent:
    def __init__(self, nlp: Language, model_path: str):
        if not Doc.has_extension("srl"):
            Doc.set_extension("srl", default=None)
        self.predictor = Predictor.from_path(model_path)

    def __call__(self, doc: Doc):
        predictions = self.predictor.predict(sentence=doc.text)
        doc._.srl = predictions
        return doc

In [4]:
nlp.add_pipe("sentencizer")
nlp.add_pipe("srl")

2021-08-03 17:15:36,924 - INFO - allennlp.common.plugins - Plugin allennlp_models available
2021-08-03 17:15:36,930 - INFO - allennlp.models.archival - loading archive file ../dna/resources/structured-prediction-srl-bert.2020.12.15.tar.gz
2021-08-03 17:15:36,930 - INFO - allennlp.models.archival - extracting archive file ../dna/resources/structured-prediction-srl-bert.2020.12.15.tar.gz to temp dir /var/folders/c_/r637mr6d7y99t490f5yw_trm0000gn/T/tmp8gij92k4
2021-08-03 17:15:39,870 - INFO - allennlp.common.params - dataset_reader.type = srl
2021-08-03 17:15:39,871 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2021-08-03 17:15:39,872 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2021-08-03 17:15:39,872 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2021-08-03 17:15:39,873 - INFO - allennlp.common.params - dataset_reader.token_indexers = None
2021-08-03 17:15:39,874 - INFO - allennlp.comm

2021-08-03 17:15:44,923 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.attention.self.query.bias
2021-08-03 17:15:44,923 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.attention.self.query.weight
2021-08-03 17:15:44,924 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.attention.self.value.bias
2021-08-03 17:15:44,924 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.attention.self.value.weight
2021-08-03 17:15:44,925 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.intermediate.dense.bias
2021-08-03 17:15:44,925 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.intermediate.dense.weight
2021-08-03 17:15:44,926 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.output.LayerNorm.bias
2021-08-03 17:15:44,926 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.10.output.LayerNorm.weight
2021-08-03 17:15:44,927 - INFO - allennlp.nn.initializers -    bert_

2021-08-03 17:15:44,959 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.4.output.LayerNorm.weight
2021-08-03 17:15:44,959 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.4.output.dense.bias
2021-08-03 17:15:44,960 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.4.output.dense.weight
2021-08-03 17:15:44,960 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.5.attention.output.LayerNorm.bias
2021-08-03 17:15:44,961 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.5.attention.output.LayerNorm.weight
2021-08-03 17:15:44,961 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.5.attention.output.dense.bias
2021-08-03 17:15:44,961 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.5.attention.output.dense.weight
2021-08-03 17:15:44,962 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.5.attention.self.key.bias
2021-08-03 17:15:44,962 - INFO - allennlp.nn.initializers -    bert_mo

2021-08-03 17:15:44,989 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.key.bias
2021-08-03 17:15:44,989 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.key.weight
2021-08-03 17:15:44,990 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.query.bias
2021-08-03 17:15:44,990 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.query.weight
2021-08-03 17:15:44,990 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.value.bias
2021-08-03 17:15:44,991 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.attention.self.value.weight
2021-08-03 17:15:44,991 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.intermediate.dense.bias
2021-08-03 17:15:44,992 - INFO - allennlp.nn.initializers -    bert_model.encoder.layer.9.intermediate.dense.weight
2021-08-03 17:15:44,992 - INFO - allennlp.nn.initializers -    bert_mode

<__main__.SRLComponent at 0x105f0ce20>

In [6]:
doc = nlp("The dog trashed the apartment in under 30 seconds.")
print(doc._.srl)

{'verbs': [{'verb': 'trashed', 'description': '[ARG0: The dog] [V: trashed] [ARG1: the apartment] [ARGM-TMP: in under 30 seconds] .', 'tags': ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O']}], 'words': ['The', 'dog', 'trashed', 'the', 'apartment', 'in', 'under', '30', 'seconds', '.']}


## WordNet

In [1]:
from nltk.corpus import wordnet as wn

In [6]:
print('nouns:')
syns = wn.synsets('attack', pos=wn.NOUN)
for syn in syns:
    print("syn")
    print(syn.lemmas())
    print(syn.definition())
    print(syn.hypernyms())
print()
print('verbs:')
syns = wn.synsets('attack', pos=wn.VERB)
for syn in syns:
    print("syn")
    print(syn.lemmas())
    print(syn.definition())
    print(syn.hypernyms())
    for lemma in syn.lemmas():
        print(lemma)
        print(lemma.frame_ids())
        print(" | ".join(lemma.frame_strings()))

nouns:
syn
[Lemma('attack.n.01.attack'), Lemma('attack.n.01.onslaught'), Lemma('attack.n.01.onset'), Lemma('attack.n.01.onrush')]
(military) an offensive against an enemy (using weapons)
[Synset('operation.n.05')]
syn
[Lemma('attack.n.02.attack')]
an offensive move in a sport or game
[Synset('turn.n.03')]
syn
[Lemma('fire.n.09.fire'), Lemma('fire.n.09.attack'), Lemma('fire.n.09.flak'), Lemma('fire.n.09.flack'), Lemma('fire.n.09.blast')]
intense adverse criticism
[Synset('criticism.n.01')]
syn
[Lemma('approach.n.01.approach'), Lemma('approach.n.01.attack'), Lemma('approach.n.01.plan_of_attack')]
ideas or actions intended to deal with a problem or situation
[Synset('conceptualization.n.01')]
syn
[Lemma('attack.n.05.attack'), Lemma('attack.n.05.attempt')]
the act of attacking
[Synset('crime.n.01')]
syn
[Lemma('attack.n.06.attack'), Lemma('attack.n.06.tone-beginning')]
a decisive manner of beginning a musical tone or phrase
[Synset('beginning.n.05')]
syn
[Lemma('attack.n.07.attack')]
a sud

In [10]:
print('nouns:')
syns = wn.synsets('look', pos=wn.NOUN)
for syn in syns:
    print("syn")
    print(syn.lemmas())
    print(syn.definition())
    print(syn.hypernyms())
print()
print('verbs:')
syns = wn.synsets('look', pos=wn.VERB)
for syn in syns:
    print("syn")
    print(syn.lemmas())
    print(syn.definition())
    print(syn.hypernyms())
    print("frames")
    for lemma in syn.lemmas():
        print(lemma)
        print(lemma.frame_ids())
        print(" | ".join(lemma.frame_strings()))

nouns:
syn
[Lemma('expression.n.01.expression'), Lemma('expression.n.01.look'), Lemma('expression.n.01.aspect'), Lemma('expression.n.01.facial_expression'), Lemma('expression.n.01.face')]
the feelings expressed on a person's face
[Synset('countenance.n.01')]
syn
[Lemma('look.n.02.look'), Lemma('look.n.02.looking'), Lemma('look.n.02.looking_at')]
the act of directing the eyes toward something and perceiving it visually
[Synset('sensing.n.02')]
syn
[Lemma('look.n.03.look')]
physical appearance
[Synset('appearance.n.01')]
syn
[Lemma('spirit.n.02.spirit'), Lemma('spirit.n.02.tone'), Lemma('spirit.n.02.feel'), Lemma('spirit.n.02.feeling'), Lemma('spirit.n.02.flavor'), Lemma('spirit.n.02.flavour'), Lemma('spirit.n.02.look'), Lemma('spirit.n.02.smell')]
the general atmosphere of a place or situation and the effect that it has on people
[Synset('atmosphere.n.01')]

verbs:
syn
[Lemma('look.v.01.look')]
perceive with attention; direct one's gaze towards
[]
frames
Lemma('look.v.01.look')
[2, 22]


## Wordnet 3.1 Online Search

In [1]:
import requests
import xml.etree.ElementTree as ET

In [4]:
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 
    "Accept-Encoding": "gzip, deflate", 
    "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8", 
    "Dnt": "1", 
    "Host": "httpbin.org", 
    "Upgrade-Insecure-Requests": "1", 
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", 
}
search_term = "look-for"
response = requests.get(f'https://wordnetweb.princeton.edu/perl/webwn?s={search_term}&o2=&o0=&o8=1&o1=1&o7=1&o5=&o9=&o6=&o3=&o4=&h=00', headers=headers)
print(response.content)

h3s = ET.fromstring(response.content).findall('.//h3')
for h3 in h3s:
    print(h3)

ConnectionError: HTTPSConnectionPool(host='wordnetweb.princeton.edu', port=443): Max retries exceeded with url: /perl/webwn?s=look-for&o2=&o0=&o8=1&o1=1&o7=1&o5=&o9=&o6=&o3=&o4=&h=00 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f8c60731220>: Failed to establish a new connection: [Errno 61] Connection refused'))

## PyDictionary

In [1]:
from PyDictionary import PyDictionary
dictionary=PyDictionary()

print (dictionary.meaning("escaping"))

{'Verb': ['run away from confinement', 'fail to experience', 'escape potentially unpleasant consequences; get away with a forbidden action', 'be incomprehensible to; escape understanding by', 'remove oneself from a familiar environment, usually for pleasure or diversion', "flee; take to one's heels; cut and run", 'issue or leak, as from a small opening']}


In [16]:
import datefinder
matches = datefinder.find_dates("June, 1928")
try:
    matches.__next__()
except StopIteration:
    print("no matches")
for match in matches:
    print(match)
    print(match.year)
    print(match.month)
    print(match.day)

In [25]:
import re
year_pattern = re.compile('[0-9]{4}')
search_res = year_pattern.search("before June 1928")
if search_res:
    print(search_res.group())
else:
    print("no match")

1928
