In [1]:
import pandas as pd
import pickle
import nltk
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import text_preprocessor as tp
from cleantext import clean

Since the GPL-licensed package `unidecode` is not installed, using Python's `unicodedata` package which yields worse results.


In [3]:
sia = SentimentIntensityAnalyzer()

In [4]:
# import classifier
with open('linear_classifier.pickle', 'rb') as data:
    model = pickle.load(data)
    
# import tfidf
with open('tfidf.pickle', 'rb') as data:
     tfidf = pickle.load(data)

In [6]:
sentiment_map = {'Negative':0, 'Positive':1}

In [7]:
def get_sentiment(text):
    """
    Predicts the sentiment of text using the Multinomial Naive Bayes Model
    """
    sentiment_id = model.predict(tfidf.transform([text]))
    return get_name(sentiment_id)

In [8]:
def get_name(sentiment_id):
    """
    Gets sentiment name from sentiment_map using sentiment_id
    """
    for sentiment, id_ in sentiment_map.items():
        if id_ == sentiment_id:
            return sentiment

In [9]:
def get_noun(text):
    """
    Finds noun of the text
    """
    tokenizer = ToktokTokenizer()
    tokens = tokenizer.tokenize(text)    
    pos_tags = nltk.pos_tag(tokens)
    nouns = []
    for word, tag in pos_tags:
        if tag == "NN" or tag == "NNP" or tag == "NNS":
            nouns.append(word)
    return nouns
    

In [10]:
def top_pos_word(text):
    """
    Finds top positive word using nltk vader library
    """
    pos_polarity = dict()
    for word in nltk.word_tokenize(text):
        pos_score = sia.polarity_scores(word)['pos']
        if word not in pos_polarity:
            pos_polarity[word] = pos_score
        else:
            pos_polarity[word] += pos_score
    top_word = max(pos_polarity, key=pos_polarity.get)
    return top_word

In [11]:
def top_neg_word(text):
    """
    Finds top negative word using nltk vader library
    """
    neg_polarity = dict()
    for word in nltk.word_tokenize(text):
        neg_score = sia.polarity_scores(word)['neg']
        if word not in neg_polarity:
            neg_polarity[word] = neg_score
        else:
            neg_polarity[word] += neg_score
    top_word = max(neg_polarity, key=neg_polarity.get)
    return top_word

In [12]:
def sentiment_analysis(text):
    """
    Finds the sentiment of text, prints positive or negative word and 
    prints the causing words of positivity or negativity
    """
    text = clean(text)
    sentiment = get_sentiment(text)
    print(f'Sentiment: {sentiment}')
    if sentiment == 'Positive':
        nouns = get_noun(text)
        print(f'Positive word: {top_pos_word(text)}')
        print(f'Cause of positivity: {nouns}')
    elif sentiment == 'Negative':
        nouns = get_noun(text)
        print(f'Negative word: {top_neg_word(text)}')
        print(f'Cause of negativity: {nouns}')

### Test the model with some sample input texts

In [13]:
text1 = "You don’t expect a jacuzzi when you paying less then 20$ a night this hotel was amazing within its budget firstly I was glad seeing their COVID precaution as I am myself a doctor secondly the staff was really helpful although the rooms aren’t huge but good enough for two people with a space for you heavy luggage and other stuff also to mention the food was really amazing it was totally like home made food I wish them all the luck and hope they will keep up the good work"

In [14]:
sentiment_analysis(text1)

Sentiment: Positive
Positive word: amazing
Cause of positivity: ['jacuzzi', 'night', 'hotel', 'budget', 'i', 'precaution', 'i', 'doctor', 'staff', 'rooms', 'people', 'space', 'luggage', 'stuff', 'food', 'home', 'food', 'i', 'luck', 'hope', 'work']


In [15]:
text2 = "Location is nice for all travelers and Staff is also good and very welcoming staff. Hotel singh continental is a good option for stay for everyone and my experience was very good. Hotel facilities are good. Overall experience was good. This is a well furnished hotel with a well behaved and active staff. Overall a nice stay. It was comfortable stay with hotel . Food was very tasty. This hotel experience was amazing. It's a really nice hotel. I will definately come again and stay."

In [16]:
sentiment_analysis(text2)

Sentiment: Positive
Positive word: good
Cause of positivity: ['location', 'travelers', 'staff', 'hotel', 'singh', 'continental', 'option', 'stay', 'everyone', 'experience', 'hotel', 'facilities', 'experience', 'good.', 'hotel', 'well', 'staff.', 'stay.', 'stay', 'hotel', 'food', 'hotel', 'experience', 'hotel.', 'i']


In [17]:
text3 = "Quality Hospitality personal behaviour of all management staff was truly good and Everyone took good care of us.. we are very happy and wish to stay back any time in future thank you..buffet was extremely delicious.... special thanx to MR. UDAY . They are very helpful and awesome... I hope they get succes in hospitality industry... I wish them all the best... Will suggest others to visit here Breakfast has limited items..."

In [18]:
sentiment_analysis(text3)

Sentiment: Positive
Positive word: good
Cause of positivity: ['quality', 'hospitality', 'behaviour', 'management', 'staff', 'everyone', 'care', 'time', 'thank', '....', 'thanx', 'succes', 'hospitality', 'industry', 'others', 'breakfast', 'items']


In [19]:
text4 = "The condition of the rooms were very bad. Bed sheets, linens were dirty.Toilet was horrible. Ambience was very bad.Could not stay a single night but paid Rs, 1900/ advance whice was not repaid. Overall a horrible experience for two hours."

In [20]:
sentiment_analysis(text4)

Sentiment: Positive
Positive word: the
Cause of positivity: ['condition', 'rooms', 'bed', 'sheets', 'linens', 'dirty.toilet', 'ambience', 'night', 'rs', 'advance', 'whice', 'experience', 'hours']


In [21]:
text6 = "My product was damaged at the 7th day and I initiated the replacement option, the reverse pick up was scheduled however no delivery information is available still. Itz being 3 days now, when you go to boat website the tracking status button does not works on the website and even you cannot reach an representative via call. I don't know if I am gonna receive the product back or not!"

In [22]:
sentiment_analysis(text6)

Sentiment: Negative
Negative word: damaged
Cause of negativity: ['product', 'day', 'i', 'replacement', 'option', 'reverse', 'pick', 'delivery', 'information', 'still.', 'itz', 'days', 'tracking', 'status', 'button', 'website', 'call.', 'don', 't', 'product']
