In [1]:
# Import libraries
import spacy
from spacy import displacy
from spacy.matcher import Matcher
import pandas as pd
nlp = spacy.load("en_core_web_sm")

### Lets check our rule on a larger corpus

In [2]:
# load the dataset csv file
active_passive = pd.read_csv("../data/active_passive.csv")
active_passive.head()

Unnamed: 0,Active,Passive
0,He reads a novel.,A novel is read.
1,He does not cook food.,Food is not cooked by him.
2,Does he purchase books?,Are books being purchased by him?
3,They grow plants.,Plants are grown by them.
4,She teaches me.,I am taught by her.


In [4]:
# Print the shape of the dataframe.
active_passive.shape

(40, 2)

In [5]:
# Separate out active and passive sentences in arrays.
active = active_passive['Active']
passive = active_passive['Passive']

### Create the rule

In [15]:
passive_rule = [{'DEP':{"IN": ['nsubjpass', 'auxpass']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])

In [16]:
def is_passive(doc):
    return True if len(matcher(doc)) > 0 else False

### Check rule on active voice sentences

In [17]:
count = 0
for sent in active:
    doc = nlp(sent)
    count += 1
    print(count, is_passive(doc))

1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 False
18 False
19 False
20 False
21 False
22 False
23 False
24 False
25 False
26 False
27 False
28 False
29 False
30 False
31 False
32 False
33 False
34 False
35 False
36 False
37 False
38 False
39 False
40 False


### Check rule on passive voice sentences

In [18]:
count = 0
for sent in passive:
    doc = nlp(sent)
    count += 1
    print(count, is_passive(doc))

1 True
2 True
3 True
4 True
5 True
6 True
7 True
8 True
9 True
10 True
11 True
12 True
13 True
14 True
15 True
16 True
17 True
18 True
19 True
20 True
21 True
22 True
23 True
24 True
25 True
26 True
27 True
28 True
29 True
30 True
31 True
32 True
33 True
34 True
35 True
36 True
37 True
38 True
39 True
40 True


### Let's troubleshoot

In [19]:
passive[7]

'Is a table being bought by Ritika?'

In [20]:
doc = nlp(passive[7])
displacy.render(doc, style='dep')

In [25]:
passive_rule = [{'DEP':{"IN": ['nsubjpass']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])
sentences = ['What is your name?', 'Is coffee serving here?', 'Is she being promoted as new assistant?', 'Women are said to live longer than men.' ]
for sent in sentences:
    doc = nlp(sent)
    print(is_passive(doc))
    displacy.render(doc, style="dep", jupyter = True)

False


False


True


True


### Let's visualize their dependency trees

In [26]:
passive_rule = [{'DEP':{"IN": ['nsubjpass', 'auxpass']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])
sentences = ['Sofia is learning NLP','Eggs are laid by Hens','Mouse is eaten by a black cat','She has done her job productively']
for sent in sentences:
    doc = nlp(sent)
    print(is_passive(doc))
    displacy.render(doc, style="dep", jupyter = True)

False


True


True


False


In [27]:
s = 'JetAirways cancelled the flight this morning which was already late.'
doc = nlp(s)
print(is_passive(doc))
displacy.render(doc, style="dep", jupyter = True)

False


In [28]:
s = 'It was the best of times and it was the worst of times.'
doc = nlp(s)
print(is_passive(doc))
displacy.render(doc, style="dep", jupyter = True)

False


In [29]:
s="Dole was defeated by Clinton"
doc = nlp(s)
print(is_passive(doc))
displacy.render(doc, style="dep", jupyter = True)

True


[Dependencies](https://universaldependencies.org/docs/en/dep/)

### Update our rule
[Reference](https://spacy.io/usage/rule-based-matching)

## Summary
 - Always test your rules and hueristics on a larger corpus to see the effectiveness of the rules
 - One can write intricate matching rules using `matcher` object