## Sentence Classifier 

### This project classifies a given sentence into various types such as imperative , interrogative , assertive, exclamatory or negative statements with the help of open-source software library Spacy.

## Spacy documentation

#### token.text: each word of the string.

#### token.pos_ : parts of speech, assigning word types to tokens, like verb or noun.

#### token.dep_  : assigning syntactic dependency labels, describing the relations between individual tokens, like subject or object.

In [1]:
import spacy
from enum import Enum

In [2]:
nlp = spacy.load('en_core_web_md')

### Sentence classes

In [3]:
class SentenceType(Enum):
    IMPERATIVE = 1
    ASSERTIVE = 2
    INTERROGATIVE = 3
    EXCLAMATORY = 4
    NEGATIVE = 5

### Sentence is hard coded

In [4]:
sentence = "Hop on one foot."

In [5]:
def getTaggedWords(sentence):
    tagged_sentence = nlp(sentence)
    tagged_words = [tok for tok in tagged_sentence]
    return tagged_words

In [6]:
tagged_words = getTaggedWords(sentence) #Tokenising

In [7]:
print(tagged_words[0].tag_)

VB


In [8]:
def isMatchingRegex(regex_array,tagged_words):
    
    match = False
    for tagged_word in tagged_words:
        word_tags = tagged_word.tag_
        
        for tag in word_tags:
            for i in range(len(regex_array)):
                
                if tag == regex_array[i]:
                    match = True
                elif tag is None:
                    break
    return match                
                   
                    
                    

### The tone of a sentence can easily be understood simply by looking at its endmark (for example '!')

In [9]:
endmark = tagged_words[-1].text

In [10]:
def Exclamatory(tagged_words):
    if endmark in ['!'] or  tagged_words[0].tag_ == ['UH'] :
        return SentenceType.EXCLAMATORY
        

In [11]:
def Imperative(tagged_words):

        
        firstRegexRule = [ ["VB","MD", "VBP", "VBZ", "VBD"], ["PRP", "NN", "NNP", "NNS", "VBG"],["DT","VB","MD", "VBP", "VBZ", "VBD"],["DT"]]

        secondRegexRule = [["WRB", "WP"], ["MD", "VBP", "VBZ", "VBD"]]

        thirdRegexRule = [["MD", "VB","VBP", "VBZ", "VBD"],["DT","IN"], ["PRP", "NN", "NNP", "NNS", "VBG"]]
        
        fourthRegexRule = [["PRP","NNP"], ["VB","VBP", "VBZ", "VBD","VBG"],["PRP", "NN", "NNP", "NNS", "VBG"],["DT"],["PRP", "NN", "NNP", "NNS", "VBG"]]
        
        fifthRegexRule = [["PRP","NNP","NN","NNS","VBG"], ["VB","VBP", "VBZ", "VBD","VBG"]]
        
        if((isMatchingRegex(firstRegexRule, tagged_words)) or (isMatchingRegex(secondRegexRule, tagged_words))\
        or  (isMatchingRegex(thirdRegexRule, tagged_words)) or (isMatchingRegex(fourthRegexRule, tagged_words))\
           or (isMatchingRegex(fifthRegexRule,tagged_words))):
                return SentenceType.IMPERATIVE
        
        if tagged_words[0].tag_ in ["VB","UH"] or endmark in ['!'] and tagged_words[1].tag_ not in ['NN','PRP']:
            return SentenceType.IMPERATIVE
        
        for i in range(len(sentence)):
            for tagged_word in tagged_words:
                if tagged_word.text in ["must","should","order"]:
                    return SentenceType.IMPERATIVE  
    
    


In [12]:
negativewords = [ "no", "not", "never", "neither", "nobody", "none", "nor",

    "nothing", "nowhere", "few", "hardly", "little", "rarely",

    "scarcely", "seldom", "hadn’t", "don’t", "doesn’t",

    "didn’t", "couldn’t", "can’t", "wouldn’t", "haven’t", "aren’t",

    "hasn’t", "won’t", "shouldn’t", "isn’t", "wasn’t", "weren’t"]


In [13]:
def Negative(tagged_words):
        
    for j in range(len(negativewords)):           
        for tagged_word in tagged_words:
            if tagged_word.text == negativewords[j]:
                #print(tagged_word.text)
                return SentenceType.NEGATIVE
       

In [14]:
def Assertive(tagged_words):
    if endmark in ['.'] and not Imperative(tagged_words):
        return SentenceType.ASSERTIVE

In [15]:
def Interrogative(tagged_words):
  
    #Regex Rule:
    #[MD/VBP/VBZ/VBD + PRP/NN/NNP/NNPS/NNS/VBG + *] or [WRB/WP + MD/VBP/VBZ/VBD + *]
    #or [MD/VBP/VBZ/VBD + DT + PRP/NN/NNP/NNPS/NNS/VBG + *]
    
    firstRegexRule = [ ["MD", "VBP", "VBZ", "VBD"], ["PRP", "NN", "NNP", "NNS", "VBG"],]

    secondRegexRule = [["WRB", "WP"], ["MD", "VBP", "VBZ", "VBD"]]

    thirdRegexRule = [["MD", "VBP", "VBZ", "VBD"],["DT"], ["PRP", "NN", "NNP", "NNS", "VBG"]]
    
  
    if((isMatchingRegex(firstRegexRule, tagged_words)) or (isMatchingRegex(secondRegexRule, tagged_words))\
    or  (isMatchingRegex(thirdRegexRule, tagged_words))) or endmark in ['?']:
            return SentenceType.INTERROGATIVE
  
   

In [16]:
if (Imperative(tagged_words)):
    print(Imperative(tagged_words))

SentenceType.IMPERATIVE


In [17]:
if (Interrogative(tagged_words)):
    print(Interrogative(tagged_words))

In [18]:
if Exclamatory(tagged_words):
    print (Exclamatory(tagged_words))

In [19]:
if Negative(tagged_words):
    print(Negative(tagged_words))

In [20]:
if Assertive(tagged_words):
    print(Assertive(tagged_words))