In [3]:
import pandas
import os
import re
from collections import Counter
import spacy
from spacy import displacy
import en_core_web_lg
import numpy as np

In [4]:
# Directories

IP_DIR = '../data/input/imperatives/ground_truth/'
OP_DIR = '../data/output/imperatives/ground_truth/'

In [5]:
# Read input
ip_file = 'imperatives_github.txt'
op_file = ''

In [6]:
def preprocess_text(text):
    # text = text.replace('@', '')
    text = text.replace('\n', ' ')
    # text = text.replace('#', '')
    text = text.replace('RT ', '')
    text = text.translate ({ord(c): ' ' for c in "!@#$%^&*()[]{};.:,/<>?\|`~-=_+"})
    
    removeSpecialChars = text.translate ({ord(c): " " for c in "!@#$%^&*()[]{};:,/<>?\|`~-=_+"})
    
    text = re.sub(r'http\S+', '', text) #Remove URLs
    text = re.sub('([^\x00-\x7F])+', '', text) # Remove non-english characters
    text = re.sub(' +', ' ', text) # Remove multiple spaces
    
    text = text.strip()
    
    return text

In [7]:
imperatives = []

ip_filepath = os.path.join(IP_DIR, ip_file)

with open(ip_filepath, 'r') as ip:
    for sentence in ip:
        sentence = preprocess_text(sentence)
        imperatives.append(sentence)

In [8]:
imperatives

['Remember the test date',
 'Live in peace',
 'Include the required attachement s',
 "Don't follow me",
 'Do not eat in class',
 'Kill the butterfly',
 'Take me to the library',
 'Find out what you like to do most and really give it a whirl',
 'Believe me',
 'Move around town',
 'Lets take a walk',
 'Dont stop me now',
 'Stop talking now',
 'Do not smoke in your room',
 "Don't you talk to me like that",
 "Don't worry be happy",
 'Eat your veggies',
 'Let them cool',
 'Take your coat off',
 'Power the world',
 'Stop and Go',
 'repair your camera',
 'Write read do what you need',
 'Dont touch me',
 'Mix together sugar and salt',
 'Lock the door',
 'Rise against the tyrannical government',
 'Make up your mind to pool your resources and get the moust out of your remaining years of life',
 'Driver carefully',
 'have a safe trip',
 'bring me a suitcase from my closet',
 "Don't wait for my answer get started right away",
 'give me your full name please',
 'You better be quit',
 'Eat them with

# Rule 1

In [9]:
nlp = en_core_web_lg.load()

In [10]:
#rule 3: Has a verb in its lemma (base) form and is the root and does not have any subject child in it's dependency structure
def root_verb_in_lemma_nosubj(sent):
    doc = nlp(sent)
    cond1 = False
    cond2 = True
    for token in doc:
        if token.dep_=='ROOT' and token.pos_=='VERB' and token.text.lower() == token.lemma_:
            cond1 = True
            #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)
        if token.dep_ == 'nsubj':
            cond2 = False   
    return (cond1 and cond2)

In [11]:
## Test for rule 1
rule1 = []
positives_rule1 = []
negatives_rule1 = []

for sentence in imperatives:
    if (root_verb_in_lemma_nosubj(sentence)):
        rule1.append(True)
        positives_rule1.append(sentence)
    else:
        rule1.append(False)
        negatives_rule1.append(sentence)

counter_rule1 = Counter(rule1)
print(counter_rule1)

Counter({True: 508, False: 196})


In [12]:
negatives_rule1

['Find out what you like to do most and really give it a whirl',
 'Lets take a walk',
 "Don't you talk to me like that",
 'Let them cool',
 'Power the world',
 'Write read do what you need',
 'Driver carefully',
 'have a safe trip',
 'You better be quit',
 "Give me a few weeks I'll get back to you when I have time",
 'Be nice to your brother',
 'be nice',
 'Be sure to clean your weapon',
 "speak up I can't hear you",
 "I'd be grateful if you didn't tell anyone about this",
 'You watch your mouth young man',
 'tell her I need her right now',
 'Make me a cup of tea',
 'Have a coke and a smile',
 'Have fun at the fair tonight',
 'have a break',
 'Get out of my way',
 'Save Ferris',
 'Speak what you think now in hard words and tomorrow speak what tomorrow thinks in hard words again though it contradict every thing you said today',
 'Sit be quite',
 'have fun tonight',
 'Get out',
 'Take if you must this little bag of dreams',
 'Report on what you find',
 'just do it',
 'Let this generation

# Rule 2

In [13]:
# Define the keywords that will be used to extract the imperative sentences
# Rules are identified from the paper - https://www.aclweb.org/anthology/W14-2117.pdf
# 1. "find those sentences with a verb (in its base form) as the root 
#     in the phrase structure and this particular verb has no 
#.    subject child in the dependency structure" 
#.    Example: You must first discuss the matter there, and you need to be specific”
# 2. "recognize the use of a personal pronoun or noun (e.g., “you”, “we”,
#.     or a username) followed by a modal verb (e.g., “should”, “must”, “need”) as an imperative"


# Source: https://www.myenglishpages.com/site_php_files/grammar-lesson-modals.php
modal_verbs = ['can', 'cant','could', 'couldnt', 'may', 'might', 'will', 'wont', 'would', 'wouldnt', 'shall','should', \
               'shouldnt', 'must', 'ought', 'dare', 'had better', 'ask']
# modal_verbs = ['can', 'may', 'might', 'will', 'would', 'shall','should', 'must', 'ought', 'dare', 'had better', 'ask']

# Source: https://grammar.yourdictionary.com/parts-of-speech/pronouns/list-of-personal-pronouns.html
# personal_pronoun = ['I', 'me', 'we', 'us', 'you', 'he', 'she', 'her', 'him', 'it', 'they', 'them']
personal_pronoun = ['us', 'you', 'he', 'she', 'her', 'him', 'it', 'they', 'them']

In [14]:
# Check for rule 2

# Create combinations of possibilities for a sentence to be imperative
combinations = []
# form combinations of words and put them as a list
for pronoun in personal_pronoun:
    for verb in modal_verbs:
        combinations.append(pronoun + ' ' + verb)
        
print('Number of combinations for rule 2 =', len(combinations))

Number of combinations for rule 2 = 162


In [15]:
# Using rule 2 to check imperative snetences
def check_imperative_r2(combinations, sentence):
    if any(combo in sentence.lower() for combo in combinations):
        return True
    return False

In [16]:
## Test for rule 2
rule2 = []
positives_rule2 = []
negatives_rule2 = []

for sentence in imperatives:
    if (check_imperative_r2(combinations, sentence)):
        rule2.append(True)
        positives_rule2.append(sentence)
    else:
        rule2.append(False)
        negatives_rule2.append(sentence)

counter_rule2 = Counter(rule2)
print(counter_rule2)

Counter({False: 683, True: 21})


In [17]:
negatives_rule2

['Remember the test date',
 'Live in peace',
 'Include the required attachement s',
 "Don't follow me",
 'Do not eat in class',
 'Kill the butterfly',
 'Take me to the library',
 'Find out what you like to do most and really give it a whirl',
 'Believe me',
 'Move around town',
 'Lets take a walk',
 'Dont stop me now',
 'Stop talking now',
 'Do not smoke in your room',
 "Don't you talk to me like that",
 "Don't worry be happy",
 'Eat your veggies',
 'Let them cool',
 'Take your coat off',
 'Power the world',
 'Stop and Go',
 'repair your camera',
 'Write read do what you need',
 'Dont touch me',
 'Mix together sugar and salt',
 'Lock the door',
 'Rise against the tyrannical government',
 'Make up your mind to pool your resources and get the moust out of your remaining years of life',
 'Driver carefully',
 'have a safe trip',
 'bring me a suitcase from my closet',
 "Don't wait for my answer get started right away",
 'give me your full name please',
 'You better be quit',
 'Eat them with

## Find the statements that were different

In [18]:
identified_by_rule1_only = np.setdiff1d(positives_rule2, positives_rule1, assume_unique=True).tolist()
print('Identified by rule 1 but not by rule 2', len(identified_by_rule1_only))

for sent in identified_by_rule1_only:
    print(sent)

Identified by rule 1 but not by rule 2 20
Take if you must this little bag of dreams
Unloose the cord and they will wrap you round
Promise that you will do what you need to do
Touch the great artery Feel it bound like a deer in the might of its lightness and know the thunderless boil of the blood Lean for a bit against this bone It is the only memento you will leave to this earth Its tacitness is everlasting In the hush of the tissue wait with me for the shaft of pronouncement Press your ear against this body the way you did when you were a child holding a seashell and heard faintly the half remembered longed for sea
when buying cotton to make yourself a nice blouse be sure that it doesnt have gum on it because that way it wont hold up well after a wash soak salt fish overnight before you cook it
Don't work alone on the assignment it will be much more difficult
Come and touch the things you cannot feel And close your fingertips and fly where I can't hold you Let the sun rain fall and l

In [19]:
len(identified_by_rule1_only)

20

In [20]:
all_posiitve = positives_rule1.extend(positives_rule2)
missed_by_rules = np.setdiff1d(imperatives, all_posiitve, assume_unique=True).tolist()

print(len(missed_by_rules))

for sent in missed_by_rules:
    print(sent)

704
Remember the test date
Live in peace
Include the required attachement s
Don't follow me
Do not eat in class
Kill the butterfly
Take me to the library
Find out what you like to do most and really give it a whirl
Believe me
Move around town
Lets take a walk
Dont stop me now
Stop talking now
Do not smoke in your room
Don't you talk to me like that
Don't worry be happy
Eat your veggies
Let them cool
Take your coat off
Power the world
Stop and Go
repair your camera
Write read do what you need
Dont touch me
Mix together sugar and salt
Lock the door
Rise against the tyrannical government
Make up your mind to pool your resources and get the moust out of your remaining years of life
Driver carefully
have a safe trip
bring me a suitcase from my closet
Don't wait for my answer get started right away
give me your full name please
You better be quit
Eat them with milk
meet me at the town square
buy now while supplies last
Run for cover
Buy a dog
Enjoy some fresh apples
Pass through the intersec