In [2]:
"""
This basic example loads a pre-trained model from the web and uses it to
generate sentence embeddings for a given list of sentences.
"""

from sentence_transformers import SentenceTransformer, LoggingHandler
import numpy as np
import logging

#### Just some code to print debug information to stdout
np.set_printoptions(threshold=100)

logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout



# Load Sentence model (based on BERT) from URL
model = SentenceTransformer('bert-base-nli-mean-tokens')

# Embed a list of sentences
sentences = ['This framework generates embeddings for each input sentence',
             'Sentences are passed as a list of string.',
             'The quick brown fox jumps over the lazy dog.']
sentence_embeddings = model.encode(sentences)

# The result is a list of sentence embeddings as numpy arrays
for sentence, embedding in zip(sentences, sentence_embeddings):
    print("Sentence:", sentence)
    print("Embedding:", len(embedding))
    print("")

2020-02-16 10:09:35 - Load pretrained SentenceTransformer: bert-base-nli-mean-tokens
2020-02-16 10:09:35 - Did not find a / or \ in the name. Assume to download model from server
2020-02-16 10:09:35 - Load SentenceTransformer from folder: /net/home/smsarwar/.cache/torch/sentence_transformers/public.ukp.informatik.tu-darmstadt.de_reimers_sentence-transformers_v0.2_bert-base-nli-mean-tokens.zip
2020-02-16 10:09:35 - loading configuration file /net/home/smsarwar/.cache/torch/sentence_transformers/public.ukp.informatik.tu-darmstadt.de_reimers_sentence-transformers_v0.2_bert-base-nli-mean-tokens.zip/0_BERT/config.json
2020-02-16 10:09:35 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "do_sample": false,
  "eos_token_ids": 0,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": f

In [20]:
from predpatt import PredPatt

def get_event_windows(text):
    pp = PredPatt.from_sentence(text)    
    #pp = PredPatt.from_sentence('This is something that Iraqi Vice President Ramadan had said would take place.')
    tokens = [token.text for token in pp.tokens]
    windows = []

    for event in pp.events:
        #print(event)
        #print(event.position)
        window_begin = 0
        window_end = 0
        for i, argument in enumerate(event.arguments):        
            print(argument.position)    
            if i == 0:
                window_begin = argument.position - len(argument.phrase().strip().split()) + 1
            if i == len(event.arguments) - 1:
                window_end = argument.position + len(argument.phrase().strip().split())              
        #windows.append((window_begin, window_end))
        print("begin")
        print(window_begin)
        print("end")
        print(window_end)
        windows.append(tokens[window_begin: window_end])
    
    #for window in windows:
    return windows

print(get_event_windows("This is something that Iraqi Vice President Ramadan had said would take place."))

#print(pp.pprint())
# print(" ".join([token.text for token in pp.tokens]))
# print(pp.events)
# print(pp.event_dict)
# print(pp.events)

# for event in pp.events:
#     print(event)
#     print(event.position)
#     for argument in event.arguments:
#         print(argument.phrase())
#         print(argument.position)

0
begin
0
end
1
3
7
11
begin
3
end
14
7
12
begin
4
end
13
[['This'], ['that', 'Iraqi', 'Vice', 'President', 'Ramadan', 'had', 'said', 'would', 'take', 'place', '.'], ['Iraqi', 'Vice', 'President', 'Ramadan', 'had', 'said', 'would', 'take', 'place']]
