In [1]:
import spacy
import en_core_web_sm
from spacy.strings import StringStore,hash_string

nlp = en_core_web_sm.load()

from spacy.matcher import Matcher
from spacy import displacy
from spacy.tokens import Token,Span



In [2]:
class Framing(object):
    name='Framing'
    
    def __init__(self,nlp):
        

        self.matcher = Matcher(nlp.vocab)
        age_1 = [{'DEP': 'nsubj'},
               {'LEMMA': 'be'},
               {'LIKE_NUM': True},
               {'LEMMA': 'year','OP':'?'},
               {'ORTH': 'old','OP':'?'}]
        
        smoke1 = [{'DEP': 'nsubj','OP':'?'},
                  {'LEMMA':'DO','OP':'?'},
             {'LEMMA':'not','OP':'!'},
             {'LEMMA':'smoke'}]
        smoke2 = [{'DEP': 'nsubj','OP':'?'},
           {'LEMMA': 'be'},
            {'LEMMA':'not','OP':'!'},
           {'POS': 'DET','OP':'?'},
           {'LEMMA': 'smoker'}]
        
        subject1 = [{'DEP': 'nsubj'},
           {'LEMMA': 'be'},
           {'ENT_TYPE': 'PERSON'}]
        subject2 = [{'POS': 'PRON'},
           {'LOWER': 'name'},
           {'LEMMA': 'be'},
           {'ENT_TYPE': 'PERSON'}]
        
        subject_p1 = [{'DEP':'nsubj'},{'POS': 'PRON'}]
        subject_p2 = [{'LOWER': 'my'},
               {'POS': 'NOUN'},
                     {'LEMMA': 'be'}]
        
        live_in1 = [{'DEP':'nsubj','OP':'?'},
                  {'LEMMA':'be'},
                  {'LOWER': 'from'},
                  {'ENT_TYPE':'GPE'}]
        live_in2 = [{'DEP':'nsubj','OP':'?'},
          {'LEMMA':'live'},
          {'LOWER': 'in'},
          {'ENT_TYPE':'GPE'}]

        
        self.matcher.add('smoking pattern',[smoke1,smoke2])
        self.matcher.add('Age',[age_1])
        self.matcher.add('Personal',[subject1,subject2])
        self.matcher.add('Pronoun',[subject_p1,subject_p2])
        self.matcher.add('Location',[live_in1,live_in2])
        
        Token.set_extension("is_age",default=False)
        Token.set_extension("is_smoker",default=False)
        Token.set_extension("is_name",default=False)
        Token.set_extension("is_pronoun",default=False)
        Token.set_extension("is_relative",default=False)
        Token.set_extension("is_living",default=False)
        
        
    def __call__(self,doc):
        matches = self.matcher(doc)
        
        for match_id,start,end in matches:
            if match_id == hash_string("Age"):
                span = doc[start:end]  # The matched span
                age = ([token for token in span.subtree if token.like_num][0])
                age._.set("is_age",True)
                string_id = nlp.vocab.strings[match_id] 
                print('pattern:',string_id)
                print('Age:', age)
                print(span)
                
                
            if match_id == hash_string("smoking pattern"):
                entity = Span(doc, start, end, label="smoker")
                span = doc[start:end]  # The matched span
                string_id = nlp.vocab.strings[match_id] 
                print('pattern:',string_id)
                print(span)
                for token in entity:  # set values of token attributes
                    token._.set("is_smoker", True)
                    
            if match_id == hash_string("Personal"):
                entity = Span(doc, start, end)
                span = doc[start:end]  # The matched span
                string_id = nlp.vocab.strings[match_id] 
                print('pattern:',string_id)
                print(span)
                for token in entity:# set values of token attributes
                    print(token)
                    if token.ent_type_ == 'PERSON':
                        token._.set("is_name", True)
                        
            if match_id == hash_string("Pronoun"):
                entity = Span(doc, start, end)
                span = doc[start:end]  # The matched span
                string_id = nlp.vocab.strings[match_id] 
                print('pattern:',string_id)
                print(span)
                for token in entity:# set values of token attributes
                    if token.dep_ == 'nsubj':
                        token._.set("is_pronoun", True)
                        
            if match_id == hash_string("Location"):
                entity = Span(doc, start, end)
                span = doc[start:end]  # The matched span
                string_id = nlp.vocab.strings[match_id] 
                print('pattern:',string_id)
                print(span)
                for token in entity:# set values of token attributes
                    if token.ent_type_ == 'GPE':
                        token._.set("is_living", True)
                
        return doc
            
        

In [3]:
component=Framing(nlp)
nlp.add_pipe(component,last=True)

In [4]:
#Define  frame dictionary

Frame={'age': 0, 'live_in': 0, 'pronoun': 0,'name':0,'smoker':False }

In [10]:

doc=nlp(u"My mother is 35. She doesn't smokes and lives in Italy. Her name is Laura.")


print('HERE I WOULD FILL THE SLOTS')

#Age
age_slot = ([(token.text) for token in doc if token._.is_age])
if not age_slot:
    print('The age is missing')
else:
    Frame['age'] = age_slot[0]

    
#smoker
if not ([(token.text) for token in doc if token._.is_smoker]):
    print('not a smoker!!!!')
    Frame['smoker'] = False
else:
    print('smoker')
    Frame['smoker'] = True
    


#Location
loc = ([(token.text) for token in doc if token._.is_living])
if not loc:
    print('Location missing')
else:
    print('location', loc[0] )
    Frame['live_in']  = loc[0]
    
#Name

name = ([(token.text) for token in doc if token._.is_name])
if not name:
    print('Personal name missing')
else:
    print('name', name[0] )
    Frame['name']  = name[0]

#Pronoun
pronoun = ([(token.text) for token in doc if token._.is_pronoun])
if not pronoun:
    print('Pronoun missing')
else:
    for item in pronoun:
        print('pronoun', item )
        Frame['pronoun']  = item



pattern: Pronoun
My mother is
pattern: Age
Age: 35
mother is 35
pattern: Location
lives in Italy
pattern: Personal
name is Laura
name
is
Laura
HERE I WOULD FILL THE SLOTS
not a smoker!!!!
location Italy
name Laura
pronoun mother


In [11]:
print(Frame)

{'age': '35', 'live_in': 'Italy', 'pronoun': 'mother', 'name': 'Laura', 'smoker': False}


In [7]:
treatments_hrisk= ['chemotherapy','immunotherapy','radiotherapy','leukaemia',
                   'lymphoma','myeloma','transplant','immunosuppressant','cancer','pregnant']



In [None]:
doc=nlp(u"I am Laura and I am 28 years old and a smoker")
print([(token.text) for token in doc if token._.is_age])
print([(token.text) for token in doc if token._.is_smoker])