In [1]:
import spacy
import textacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp("I guess I am feeling kinda tired. I feel overwhelmed, a bit, maybe hungry. I dunno. I find myself wanting something, but I'm not sure what it is. I feel stressed certainly, too much to do maybe? But I'm not totally sure what I should be doing?")

In [4]:
svo_triples = textacy.extract.subject_verb_object_triples(doc)

for triple in svo_triples:
    print(triple)

(myself, wanting, something)


In [5]:
# returns (entity, cue, fragment)
statements = textacy.extract.semistructured_statements(doc, 'I', cue='feel')

for entity, cue, fragment in statements:
    print(entity, cue, '-->', fragment)

I am feeling --> kinda tired
I feel --> overwhelmed, a bit, maybe hungry
I feel --> stressed certainly, too much to do maybe


In [6]:
# get cues
all_statements = []
for sent in doc.sents:
    verbs = textacy.spacier.utils.get_main_verbs_of_sent(sent)
    print('sent:', sent, '\nverbs:', verbs)
    for verb in verbs:
        objects = textacy.spacier.utils.get_objects_of_verb(verb)
        subjects = textacy.spacier.utils.get_subjects_of_verb(verb)
        for subject in subjects:
            statements = textacy.extract.semistructured_statements(doc, subject.text, verb.lemma_)
            for statement in statements:
                print(subject, verb, statement)
                all_statements += [statement]
    
    print('\n')
for statement in set(all_statements):
    print(statement)

sent: I guess I am feeling kinda tired. 
verbs: [guess, feeling]
I guess (I, guess, I am feeling kinda tired)
I feeling (I, am feeling, kinda tired)
I feeling (I, feel, overwhelmed, a bit, maybe hungry)
I feeling (I, feel, stressed certainly, too much to do maybe)


sent: I feel overwhelmed, a bit, maybe hungry. 
verbs: [feel]
I feel (I, am feeling, kinda tired)
I feel (I, feel, overwhelmed, a bit, maybe hungry)
I feel (I, feel, stressed certainly, too much to do maybe)


sent: I dunno. 
verbs: [dunno]


sent: I find myself wanting something, but I'm not sure what it is. 
verbs: [find, wanting, 'm, is]
I find (I, find, myself wanting something, but I'm not sure what it is)
I 'm (I, 'm, not sure what it is)
I 'm (I, 'm, not totally sure what I should be doing)


sent: I feel stressed certainly, too much to do maybe? 
verbs: [feel, do]
I feel (I, am feeling, kinda tired)
I feel (I, feel, overwhelmed, a bit, maybe hungry)
I feel (I, feel, stressed certainly, too much to do maybe)


sent: 

In [7]:
from allennlp.predictors import Predictor
predictor = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/decomposable-attention-elmo-2018.02.19.tar.gz")

12/05/2018 17:33:15 - INFO - allennlp.models.archival -   loading archive file https://s3-us-west-2.amazonaws.com/allennlp/models/decomposable-attention-elmo-2018.02.19.tar.gz from cache at /home/russell/.allennlp/cache/1dbdfb3ce5af46c5b83353727b579a5596d45a121d59199f1c838928a87e3796.21e6e14db76ce734b669577cc3046333c6bc853767246356b4a8b2c6a85249a8
12/05/2018 17:33:15 - INFO - allennlp.models.archival -   extracting archive file /home/russell/.allennlp/cache/1dbdfb3ce5af46c5b83353727b579a5596d45a121d59199f1c838928a87e3796.21e6e14db76ce734b669577cc3046333c6bc853767246356b4a8b2c6a85249a8 to temp dir /tmp/tmp1g2uq0_z
12/05/2018 17:33:20 - INFO - allennlp.common.params -   type = default
12/05/2018 17:33:20 - INFO - allennlp.data.vocabulary -   Loading token dictionary from /tmp/tmp1g2uq0_z/vocabulary.
12/05/2018 17:33:20 - INFO - allennlp.common.from_params -   instantiating class <class 'allennlp.models.model.Model'> from params {'initializer': [['.*linear_layers.*weight', {'type': 'xavie

12/05/2018 17:33:29 - INFO - allennlp.common.params -   Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
12/05/2018 17:33:29 - INFO - allennlp.common.params -   CURRENTLY DEFINED PARAMETERS: 
12/05/2018 17:33:29 - INFO - allennlp.common.params -   model.initializer.list.list.type = xavier_normal
12/05/2018 17:33:29 - INFO - allennlp.common.params -   Converting Params object to dict; logging of default values will not occur when dictionary parameters are used subsequently.
12/05/2018 17:33:29 - INFO - allennlp.common.params -   CURRENTLY DEFINED PARAMETERS: 
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -   Initializing parameters
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -   Initializing _attend_feedforward._module._linear_layers.0.weight using .*linear_layers.*weight intitializer
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -   Initializing _attend_feedforward._module._linear_laye

12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo._elmo_lstm._token_embedder.char_conv_6.bias
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo._elmo_lstm._token_embedder.char_conv_6.weight
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo.scalar_mix_0.gamma
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo.scalar_mix_0.scalar_parameters.0
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo.scalar_mix_0.scalar_parameters.1
12/05/2018 17:33:29 - INFO - allennlp.nn.initializers -      _text_field_embedder.token_embedder_elmo._elmo.scalar_mix_0.scalar_parameters.2
12/05/2018 17:33:29 - INFO - allennlp.common.from_params -   instantiating class <class 'allennlp.data.dataset_readers.dataset_reader.DatasetRea

In [8]:
prediction = predictor.predict(
  hypothesis="Two women are sitting on a blanket near some rocks talking about politics.",
  premise="Two women are wandering along the shore drinking iced tea."
)
prediction

{'label_logits': [-3.391864776611328, 4.570619106292725, 0.9505535364151001],
 'label_probs': [0.00033908773912116885,
  0.9735872745513916,
  0.02607356198132038],
 'h2p_attention': [[0.6615484952926636,
   0.03999358043074608,
   0.04555582255125046,
   0.046556826680898666,
   0.032376978546381,
   0.02884303592145443,
   0.021681087091565132,
   0.02199293114244938,
   0.021933946758508682,
   0.03280186280608177,
   0.02464543841779232,
   0.0220700204372406],
  [2.6478128347662278e-05,
   0.9997804164886475,
   2.5384990294696763e-05,
   2.803125425998587e-05,
   1.5035763681225944e-05,
   1.5200890629785135e-05,
   2.0365438103908673e-05,
   1.5323941624956205e-05,
   1.667179458308965e-05,
   3.005035614478402e-05,
   1.3499801752914209e-05,
   1.3597185898106545e-05],
  [0.10203886777162552,
   0.08704567700624466,
   0.11516872048377991,
   0.11659414321184158,
   0.0897686704993248,
   0.1088109016418457,
   0.06201941892504692,
   0.06663387268781662,
   0.06219317018985748

In [9]:
type(prediction['premise_tokens'][0])

str

In [10]:
import pandas as pd

In [11]:
doc = nlp("I guess I am feeling kinda tired. I feel overwhelmed, a bit, maybe hungry. I dunno. I find myself wanting something, but I'm not sure what it is. I feel stressed certainly, too much to do maybe? But I'm not totally sure what I should be doing? Now it's a lot later and it's really time for me to get to bed...but a part of me wants to stay up, nonetheless")

In [12]:
results = pd.DataFrame([], columns=['premise', 'hypothesis', 'entailment', 'contradiction', 'neutral', 'e+c'])
i = 0
for premise in doc.sents:
#     entailment, contradiction, neutral = None
    for hypothesis in doc.sents:
        if (premise != hypothesis):
            prediction = predictor.predict(hypothesis=hypothesis.text, premise=premise.text)
            entailment, contradiction, neutral = prediction['label_probs']
            results.loc[i] = [premise.text, hypothesis.text, entailment, contradiction, neutral, (entailment + (1 - contradiction)) / 2]
            i += 1

In [13]:
results.sort_values(by='entailment', ascending=False)

Unnamed: 0,premise,hypothesis,entailment,contradiction,neutral
43,But I'm not totally sure what I should be doing?,"I find myself wanting something, but I'm not s...",0.956455,0.001234,0.042311
34,"I feel stressed certainly, too much to do maybe?",I dunno.,0.912021,0.070635,0.017344
44,But I'm not totally sure what I should be doing?,"I feel stressed certainly, too much to do maybe?",0.908410,0.005447,0.086143
1,I guess I am feeling kinda tired.,I dunno.,0.904198,0.053515,0.042287
9,"I feel overwhelmed, a bit, maybe hungry.",I dunno.,0.867386,0.043489,0.089125
8,"I feel overwhelmed, a bit, maybe hungry.",I guess I am feeling kinda tired.,0.836310,0.000918,0.162772
32,"I feel stressed certainly, too much to do maybe?",I guess I am feeling kinda tired.,0.781964,0.001776,0.216260
47,But I'm not totally sure what I should be doing?,"but a part of me wants to stay up, nonetheless",0.768105,0.025418,0.206478
27,"I find myself wanting something, but I'm not s...","I feel stressed certainly, too much to do maybe?",0.767259,0.020967,0.211775
42,But I'm not totally sure what I should be doing?,I dunno.,0.762202,0.184278,0.053520


In [14]:
hypothesis = 'I feel stressed'

results = pd.DataFrame([], columns=['premise', 'hypothesis', 'entailment', 'contradiction', 'neutral'])
i = 0
for premise in doc.sents:
    prediction = predictor.predict(hypothesis=hypothesis, premise=premise.text)
    entailment, contradiction, neutral = prediction['label_probs']
    results.loc[i] = [premise.text, hypothesis, entailment, contradiction, neutral]
    i += 1

In [15]:
results.sort_values(by='entailment', ascending=False)

Unnamed: 0,premise,hypothesis,entailment,contradiction,neutral
4,"I feel stressed certainly, too much to do maybe?",I feel stressed,0.985132,0.000401,0.014467
0,I guess I am feeling kinda tired.,I feel stressed,0.936851,0.002266,0.060882
1,"I feel overwhelmed, a bit, maybe hungry.",I feel stressed,0.933847,0.002187,0.063966
3,"I find myself wanting something, but I'm not s...",I feel stressed,0.833155,0.004319,0.162525
5,But I'm not totally sure what I should be doing?,I feel stressed,0.769592,0.041008,0.189401
2,I dunno.,I feel stressed,0.493208,0.287141,0.219651
8,"but a part of me wants to stay up, nonetheless",I feel stressed,0.293085,0.115519,0.591396
7,and it's really time for me to get to bed...,I feel stressed,0.109,0.353283,0.537717
6,Now it's a lot later,I feel stressed,0.081763,0.259905,0.658333


In [16]:
def demo(shape):
    nlp = spacy.load('en_vectors_web_lg')
    nlp.add_pipe(KerasSimilarityShim.load(nlp.path / 'similarity', nlp, shape[0]))

    doc1 = nlp(u'The king of France is bald.')
    doc2 = nlp(u'France has no king.')

    print("Sentence 1:", doc1)
    print("Sentence 2:", doc2)

    entailment_type, confidence = doc1.similarity(doc2)
    print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")