In [1]:
import numpy as np
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import subprocess
import tensorflow as tf

In [2]:
stopwords=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
punkts='''"#$%&\'()*+,-./:;<=>@[\\]^_`{|}~'''

In [3]:
def CorFilt(i):
    ps = PorterStemmer()
    
    buff=word_tokenize(i.lower().replace("\n","").replace("  "," ").replace("n't"," not"))
    buff2=""
    for j in pos_tag(buff):
        if j[-1]=='RB' and j[0]!="not":
            pass
        else:
            buff2+=j[0]+" "
    buff2=buff2.replace("not ","NOT")
    buff=word_tokenize(buff2.strip())
    ans=""
    for j in buff:
        if (j not in punkts) and (j not in stopwords):
            if j=="!":
                ans+=" XXEXCLMARK"
            elif j=="?":
                ans+=" XXQUESMARK"
            else:
                if j!="'s" and j!="``":
                    ans+=" "+ps.stem(j)
    return ans.strip()

In [4]:
import pickle
f=open("EmoVec","rb")
EmoVec=pickle.load(f)
f.close()

f=open("vectorizer","rb")
vectorizer=pickle.load(f)
f.close()

model=tf.keras.models.load_model("models/")

In [5]:
def EmowavE(sent,vectorizer=vectorizer,EmoVec=EmoVec,trans=True):
    
    transDict={'gu':'Gujarati',
               'hi':'Hindi'}
    # Translate from any language to english
    if trans:
        analysis = TextBlob(sent)
        if analysis.detect_language()!='en':
            try:
                print(f"\nInput text was in {transDict[analysis.detect_language()]}")
            except:
                print(f"\nInput text was not in English")
            print("\nTranslating...")
            output=subprocess.check_output(['trans','-b',sent])
            sent=output.decode('utf-8').strip()
            print(f"\nTranslation in English: {sent}")
        
    EmoBuff=vectorizer.transform([CorFilt(sent)])
    EmoDict={0:'anger',
             1:'disgust',
             2:'fear',
             3:'joy',
             4:'sadness'}
    
    weights = [float(cosine_similarity(EmoBuff.reshape(-1,1).T,EmoVec[i].reshape(-1,1).T)) for i in range(EmoVec.shape[0])]
    if sum(weights)==0:
        weights = [0 for i in range(5)]
    else:
        weights = [i/sum(weights) for i in weights]
    
    return EmoDict[np.argmax(weights)], weights

In [6]:
def EmopreD(sent,model=model,vectorizer=vectorizer):
    EmoDict={0:'anger',
        1:'disgust',
        2:'fear',
        3:'joy',
        4:'sadness'}
        
    buff=vectorizer.transform([CorFilt(sent)]).toarray()
    weights = model.predict(buff.reshape(1,1,buff.shape[1]))
    
    return EmoDict[np.argmax(weights)], weights

In [7]:
sentence = "a perfectly parceled dead body"
print(f"\n\t>>> Emotion from VSM: {EmowavE(sentence)}")
print(f"\n\t>>> Emotion from LSTM: {EmopreD(sentence)}")


	>>> Emotion from VSM: ('fear', [0.03509836056013032, 0.3937048827436725, 0.3991363370325183, 0.055455668944903565, 0.11660475071877525])

	>>> Emotion from LSTM: ('anger', array([[5.6652665e-01, 1.7838733e-08, 4.3347329e-01, 2.0179299e-12,
        2.0590299e-08]], dtype=float32))


In [8]:
sentence = "i saw ghost"
print(f"\n\t>>> Emotion from VSM: {EmowavE(sentence)}")
print(f"\n\t>>> Emotion from LSTM: {EmopreD(sentence)}")


	>>> Emotion from VSM: ('disgust', [0.08509301201343152, 0.4714097394666888, 0.25766950923036036, 0.11222387153159293, 0.07360386775792635])

	>>> Emotion from LSTM: ('fear', array([[3.2668698e-11, 7.2406436e-14, 1.0000000e+00, 1.2614825e-09,
        1.9409908e-11]], dtype=float32))


In [9]:
np.argmax(EmopreD(sentence)[1]) == np.argmax(EmowavE(sentence)[1])

False

In [10]:
def EnsemblE(sent):
    
    EmoV, weightV = EmowavE(sentence)
    EmoL, weightL = EmopreD(sentence)
    
    if np.argmax(weightV)==np.argmax(weightL):
        sureFLAG=True
    else:
        sureFLAG=False
    
    
    
    if np.max(weightV)>=np.max(weightL):
        method = "VSM"
        Emo = EmoV
        print(f"\n\t>>> Emotion from {method}: {EmoV}")
    else:
        method = "LSTM"
        Emo = EmoL
        print(f"\n\t>>> Emotion from {method}: {EmoL}")
    
    if not sureFLAG:
        print("EmowavE is not sure this time though!")
    return sureFLAG, method, Emo

In [11]:
sentence = "i saw a friendly ghost"
EnsemblE(sentence)


	>>> Emotion from LSTM: fear
EmowavE is not sure this time though!


(False, 'LSTM', 'fear')