In [1]:
# Import libraries
import spacy
from spacy import displacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")

In [2]:
# Define active and passive sentences.
active = ['Hens lay eggs.',
         'Birds build nests.',
         'The batter hit the ball.',
         'The computer transmitted a copy of the manual']
passive = ['Eggs are laid by hens',
           'Nests are built by birds',
           'The ball was hit by the batter',
           'A copy of the manual was transmitted by the computer.']

### How do we impliment the rule `if dep nsubjpass, then passive else not`?

In [3]:
# Import matcher
from spacy.matcher import Matcher

### Read more about it [here](https://spacy.io/api/matcher)

In [4]:
# Visualise the dependency parse tree of 1st sentence of the passive sentences.
for sent in passive:
    doc = nlp(sent)
    displacy.render(doc, style='dep', jupyter=True)


### Create a rule with `Matcher`

In [6]:
#Create rule with matcher

rule = [{'POS':'NOUN'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule', [rule])


In [7]:
matcher(doc)

[(15740618714089435985, 1, 2),
 (15740618714089435985, 4, 5),
 (15740618714089435985, 9, 10)]

In [8]:
doc[0:1]

A

In [10]:
doc[4:5]

manual

In [11]:
doc[9:10]

computer

### Create a rule for `passive voice`

In [12]:
pssive_rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule', [pssive_rule])

In [13]:
matcher(doc)

[(15740618714089435985, 1, 2)]

### Let's check how this rule works if we use it on a sentence with `active voice`

In [14]:
active[0]

'Hens lay eggs.'

In [16]:
doc = nlp(active[0])
displacy.render(doc, style='dep')

In [17]:
matcher(doc)

[]

### Now lets make a function that impliments this logic

In [25]:
def is_passive(doc, matcher):
  if len(matcher(doc)) > 0:
    return True
  else:
    return False

### Let's test this function on our small sample of sentences and see how the pipeline will work

In [26]:
for sent in active:
  doc = nlp(sent)
  print(is_passive(doc, matcher))

False
False
False
False


In [27]:
for sent in passive:
  doc = nlp(sent)
  print(is_passive(doc, matcher))

True
True
True
True


### Summary
 - One can go a long way by observing patterns in linguistic data, you don't always need to know the details of the linguitsics very well.
 - Once can use the `matcher` object to find if certain linguistic patterns exist in data

In [30]:
rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[rule])

doc = nlp("I am learning NLP from upGrad.")
matcher(doc)

[]

In [33]:
doc = nlp("A book is being bought by John")

In [34]:
displacy.render(doc, style='dep')