In [1]:
import pickle as pkl
import re
from sklearn.linear_model import LogisticRegression

In [2]:
def preprocess_text(text):
    
    # print(text, end ='')
    
    text = text.lower()
    text = re.sub(r'\W',' ', text)
    text = re.sub(' \d+', ' ', text)
    text = re.sub(r'\s+',' ', text)
        
    words = text.split(' ')
    words = [w.strip() for w in words]
    
    text = ' '.join(words)
    text = text.strip()
        
    # print(text)
    return text

def text_to_speechact(text, estimator_file, embedding_file, corpus_file):

    # text = preprocess_text(sample_sentence).strip()
    text = preprocess_text(text).strip()
    
    textlist = []
    ret_result = 'None'
    
    if text:
        
        # The following code can be pushed into the initialization
        # portion of the application to improve efficiency
        # ------ BEGIN ----------

        # Read the embedding
        with open(embedding_file, 'rb') as f_ip:
            vectorizer = pkl.load(f_ip)
            
        # Read the estimator
        with open(estimator_file, 'rb') as f_ip:
            estimator = pkl.load(f_ip)
            
        # -------- END -----------
        
        # Vectorize the input text
        textlist.append(text)
        text_vector = vectorizer.transform(textlist)
        
        # Estimate the predicted value
        pred = estimator.predict(text_vector)
        pred_val = pred[0]

        # Return the prediction in the form of a string
        # Efficiency: Return as integers and convert in the last responsible moment
        if pred_val == 0:
            ret_result = 'statement'
        elif pred_val == 1:
            ret_result = 'interrogative'
        elif pred_val == 2:
            ret_result = 'imperative'
            
    return ret_result

In [3]:
def speechact_wrapper(sample_sentence):
    
    estimator_file = '../models/estimator_speechact.pkl'
    embedding_file = '../data/input/groundtruth/corpus/vector_countvector_stopword.pkl'
    
    speech_act = text_to_speechact(text=sample_sentence, 
                      estimator_file=estimator_file, 
                      embedding_file=embedding_file, 
                      corpus_file=None)
    
    return speech_act

print(speechact_wrapper('What is the name of the person'))
print(speechact_wrapper('Go away'))

interrogative
imperative
