In [1]:
# Import libraries
import spacy
from spacy import displacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")

In [2]:
# Define active and passive sentences.
active = ['Hens lay eggs.',
         'Birds build nests.',
         'The batter hit the ball.',
         'The computer transmitted a copy of the manual']
passive = ['Eggs are laid by hens',
           'Nests are built by birds',
           'The ball was hit by the batter',
           'A copy of the manual was transmitted by the computer.']

### How do we impliment the rule `if dep nsubjpass, then passive else not`?

In [3]:
# Import matcher 
from spacy.matcher import Matcher

### Read more about it [here](https://spacy.io/api/matcher)

In [4]:
# Visualise the dependency parse tree of 1st sentence of the passive sentences.
doc = nlp(passive[0])
displacy.render(doc, style='dep')

### Create a rule with `Matcher`

In [7]:
#Create rule with matcher
rule = [{'POS':'NOUN'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[rule])

In [8]:
matcher(doc)

[(15740618714089435985, 0, 1), (15740618714089435985, 4, 5)]

In [10]:
doc[0:1], doc[4:5]

(Eggs, hens)

### Create a rule for `passive voice`

In [11]:
passive_rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])

In [12]:
matcher(doc)

[(15740618714089435985, 0, 1)]

In [13]:
doc[0:1]

Eggs

In [None]:
for sent in active:
    doc = nlp(sent)
    displacy.render(doc, style='dep')

### Let's check how this rule works if we use it on a sentence with `active voice`

In [15]:
doc = nlp(active[0])
displacy.render(doc, style='dep')
passive_rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])
matcher(doc)

[]

### Now lets make a function that impliments this logic

In [17]:
def is_passive(doc):
    return True if len(matcher(doc)) > 0 else False

### Let's test this function on our small sample of sentences and see how the pipeline will work

In [18]:
for sent in active:
    doc = nlp(sent)
    print(is_passive(doc))

False
False
False
False


In [19]:
for sent in passive:
    doc = nlp(sent)
    print(is_passive(doc))

True
True
True
True


### Summary
 - One can go a long way by observing patterns in linguistic data, you don't always need to know the details of the linguitsics very well.
 - Once can use the `matcher` object to find if certain linguistic patterns exist in data

In [20]:
s = 'Is a book being bought by John?'
rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[rule])
doc = nlp(s)
matcher(doc)

[]