In [1]:
import spacy
import pickle

In [2]:
from spacy.lang.en import STOP_WORDS

In [3]:
import string

In [4]:
class SentimentAnalyzer:
    def __init__ (self, reviews):
        self.nlp = spacy.load ('en_core_web_sm')
        self.neg_words = pickle.load (open ("neg_words.pkl", "rb"))
        self.neg_words.add ('not')
        
        self.reviews = [self.nlp(x.lower()) for x in reviews]
        
        self.stop_words = STOP_WORDS
        self.stop_words.update (string.punctuation)
        self.stop_words = set([x for x in self.stop_words if x not in self.neg_words])
        
        self.docs = []
    
    def drop_adverbs (self):
        for index in range(len(self.splits)):
            self.split_pattern = "([a-z]+)\s*[.,]+(\s*)"
            advbs = set()
            for t in self.splits[index]:
                if t.pos == 86 and t.text not in self.neg_words:
                    advbs.add (t.i)
            self.splits[index] = [x for x in self.splits[index] if x.i not in advbs]
            
    def drop_stopwords(self):
        # Remove all stop words that are present in the neg_words, TILL NOW: only 'not'
        for i in range (len (self.splits)):
            stpwrds = set ()
            for t in self.splits[i]:
                if t.text.lower() in self.stop_words:
                    stpwrds.add (t.i)
        self.split_pattern = re.compile (r"(\s|-)")
        self.splits[i] = [x for x in self.splits[i] if x.i not in stpwrds]

    def sentence_splitter (self):
        self.splits = []
        def splitter (sentence):
            start = 0
            counter = 0
            for token in sentence:
                if token.pos == 89 or token.text.strip() == ',':
                    if counter > start:
                        self.splits.append (sentence[start: counter])
                    start = counter + 1
                counter += 1
            #print (sentence[start: ])
            if len (sentence[start: counter]) > 0:
                self.splits.append (sentence[start: counter])
        
        for doc in self.reviews:
            for sent in doc.sents:
                #print ("Sentence: ", sent)
                splitter (sent)
        #return splits
        
    def feature_extraction (self):
        # To be done when splits have SPAN Objects
        # i.e. before any drop_*** methods()
        self.features = []
        for entry in self.splits:
            # For every entry, we need to pick out a noun and an adjective
            nouns=[]
            adjs=[]
            vbs = []
            for t in entry:
                if t.pos == 92 or t.pos == 96:
                    nouns.append (t)
                elif t.pos == 84:
                    adjs.append (t)
                elif t.pos == 100:
                    vbs.append (t)
            if len (adjs) == 0:
                adjs = vbs
            self.features.append ( 
                                        ( ', '.join(map(str, nouns)), ', '.join(map(str, adjs)) ),
                                  )
        
    def prepare(self):
        self.sentence_splitter()
        self.feature_extraction()
        self.drop_adverbs()
        
    def pprint (self):
        counter = 0
        for split in sa.splits:
            sent = ' '.join (map(str, split))
            print (sent)
            scores = sia.polarity_scores (sent)
            print (scores)
            print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
            if scores['compound'] > 0:
                print ("POSITIVE")
            elif scores['compound'] == 0:
                print ("NEUTRAL")
            else:
                print ("NEGATIVE");
            print()
            counter += 1
        print ()

In [5]:
sentences = [
    "Both Veg and Non-Veg Items were great"
]

In [6]:
sa = SentimentAnalyzer (sentences)

In [7]:
sa.prepare()

In [8]:
sa.sentence_splitter()

In [9]:
sa.splits

[veg, non-veg items were great]

In [10]:
type (sa.splits[0])

spacy.tokens.span.Span

In [11]:
# sa.drop_stopwords()

In [12]:
sa.splits

[veg, non-veg items were great]

In [13]:
sa.drop_adverbs()

In [14]:
sa.splits

[[veg], [non, -, veg, items, were, great]]

In [15]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/balor/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [16]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [17]:
sia = SentimentIntensityAnalyzer()

In [18]:
sa.feature_extraction()

In [19]:
sentences

['Both Veg and Non-Veg Items were great']

In [20]:
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    print (f"{'POSITIVE' if scores['compound'] > 0 else 'NEGATIVE'}")
    counter += 1
    print ()

veg
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: veg        	FEATURES:           	NEGATIVE

non - veg items were great
{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'compound': 0.6249}
ENTITY: items      	FEATURES: non, -, veg, great	POSITIVE



In [21]:
"""
reviews = [
    "Movie was great but Cinema Hall was not cool.",
    "Buffet was great, but ambience not so good.",
]"""

reviews = [
    " First time I tried this outlet in c.p but got disappointed with their service. Marination and the taste of starters was really not good. This outlet maybe as per review stars are good just as they serve alcohol. But I will not opt this outlet as taste matters alcohol is secondary thing. Chef must see the taste and serve the food as per barbeque nation standards.I am giving 3 stars just for the hospitality of the staff which was really very good."
]

reviews = [
    """Yes they server really good food here. Barbeque nation faces stiff competition around but keeps up well with the competitors. Food variety was good, starters were good!
    Drinks were okay.
Its Barbeque Nation, it goes obvious that Staff was exceptional. We love the Service here.

Only issue is the Ambience here. I feel they need to up the Ambience. Can be better sitting arrangement or better local arch. We sat at corner seat which was not very comfortable and was not well placed. Rest enjoyed the food here. Recommended.
"""
]

In [22]:
reviews = [
    """
     Yes they server really good food here. Barbeque nation faces stiff competition around but keeps up well with the competitors. Food variety was good, starters were good!
Drinks were okay.
Its Barbeque Nation, it goes obvious that Staff was exceptional. We love the Service here.

Only issue is the Ambience here. I feel they need to up the Ambience. Can be better sitting arrangement or better local arch. We sat at corner seat which was not very comfortable and was not well placed. Rest enjoyed the food here. Recommended.

    """
]

In [23]:
# reviews = [
#    " both veg and non veg foods were fine."
#]

In [24]:
sa = SentimentAnalyzer(reviews)
sa.prepare()

In [25]:
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    if scores['compound'] > 0:
        print ("POSITIVE")
    elif scores['compound'] == 0:
        print ("NEUTRAL")
    else:
        print ("NEGATIVE");
    counter += 1
    print ()


      yes they server good food .
{'neg': 0.0, 'neu': 0.349, 'pos': 0.651, 'compound': 0.6808}
ENTITY: food       	FEATURES: good      	POSITIVE

barbeque nation faces stiff competition
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: barbeque, nation, competition 	FEATURES: stiff     	NEUTRAL

keeps up with the competitors .
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: competitors 	FEATURES: keeps     	NEUTRAL

food variety was good
{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
ENTITY: food, variety 	FEATURES: good      	POSITIVE

starters were good ! 

{'neg': 0.0, 'neu': 0.385, 'pos': 0.615, 'compound': 0.4926}
ENTITY: starters   	FEATURES: good      	POSITIVE

drinks were okay . 

{'neg': 0.0, 'neu': 0.513, 'pos': 0.487, 'compound': 0.2263}
ENTITY: drinks     	FEATURES: okay      	POSITIVE

its barbeque nation
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: barbeque, nation 	FEATURES:           	NEUTRAL

it goes obvious th

In [26]:
sia.polarity_scores (reviews[0])

{'neg': 0.032, 'neu': 0.628, 'pos': 0.34, 'compound': 0.9889}

In [27]:
doc = sa.nlp(reviews[0])
for t in doc:
    print (t, t.pos_, t.tag_, spacy.explain (t.tag_))


      SPACE _SP None
Yes INTJ UH interjection
they PRON PRP pronoun, personal
server VERB VBP verb, non-3rd person singular present
really ADV RB adverb
good ADJ JJ adjective
food NOUN NN noun, singular or mass
here ADV RB adverb
. PUNCT . punctuation mark, sentence closer
Barbeque NOUN NN noun, singular or mass
nation NOUN NN noun, singular or mass
faces VERB VBZ verb, 3rd person singular present
stiff ADJ JJ adjective
competition NOUN NN noun, singular or mass
around ADV RB adverb
but CCONJ CC conjunction, coordinating
keeps VERB VBZ verb, 3rd person singular present
up ADP RP adverb, particle
well ADV RB adverb
with ADP IN conjunction, subordinating or preposition
the DET DT determiner
competitors NOUN NNS noun, plural
. PUNCT . punctuation mark, sentence closer
Food NOUN NN noun, singular or mass
variety NOUN NN noun, singular or mass
was AUX VBD verb, past tense
good ADJ JJ adjective
, PUNCT , punctuation mark, comma
starters NOUN NNS noun, plural
were AUX VBD verb, past tense
go

In [28]:
sia.polarity_scores ("There is so much of variety in stuffs")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [29]:
doc = sa.nlp ("Varities of Veg and non Veg foods was there.")
for t in doc:
    print (t, t.pos_, t.tag_, spacy.explain (t.tag_))

Varities NOUN NNS noun, plural
of ADP IN conjunction, subordinating or preposition
Veg PROPN NNP noun, proper singular
and CCONJ CC conjunction, coordinating
non ADJ AFX affix
Veg PROPN NNP noun, proper singular
foods NOUN NNS noun, plural
was AUX VBD verb, past tense
there ADV RB adverb
. PUNCT . punctuation mark, sentence closer


In [30]:
r = 'Varities of Veg and non-veg foods was there.'
print (sia.polarity_scores (r))

doc = sa.nlp (r)
for t in doc:
    print (t, t.pos_, t.tag_, spacy.explain (t.tag_))

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Varities NOUN NNS noun, plural
of ADP IN conjunction, subordinating or preposition
Veg PROPN NNP noun, proper singular
and CCONJ CC conjunction, coordinating
non ADJ JJ adjective
- ADJ JJ adjective
veg ADJ JJ adjective
foods NOUN NNS noun, plural
was AUX VBD verb, past tense
there ADV RB adverb
. PUNCT . punctuation mark, sentence closer


In [31]:
reviews = [
    " First time I tried this outlet in c.p but got disappointed with their service. Marination and the taste of starters was really not good. This outlet maybe as per review stars are good just as they serve alcohol. But I will not opt this outlet as taste matters alcohol is secondary thing. Chef must see the taste and serve the food as per barbeque nation standards.I am giving 3 stars just for the hospitality of the staff which was really very good."
]

In [32]:
sa = SentimentAnalyzer (reviews)
sa.prepare()

In [33]:
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    if scores['compound'] > 0:
        print ("POSITIVE")
    elif scores['compound'] == 0:
        print ("NEUTRAL")
    else:
        print ("NEGATIVE");
    counter += 1
    print ()

  first time i tried this outlet in c.p
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: time, outlet, c.p 	FEATURES: first     	NEUTRAL

got disappointed with their service .
{'neg': 0.437, 'neu': 0.563, 'pos': 0.0, 'compound': -0.4767}
ENTITY: service    	FEATURES: disappointed	NEGATIVE

marination
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: marination 	FEATURES:           	NEUTRAL

the taste of starters was not good .
{'neg': 0.286, 'neu': 0.714, 'pos': 0.0, 'compound': -0.3412}
ENTITY: taste, starters 	FEATURES: good      	NEGATIVE

this outlet as per review stars are good as they serve alcohol .
{'neg': 0.0, 'neu': 0.791, 'pos': 0.209, 'compound': 0.4404}
ENTITY: outlet, review, stars, alcohol 	FEATURES: good      	POSITIVE

i will not opt this outlet as taste matters alcohol is secondary thing .
{'neg': 0.0, 'neu': 0.909, 'pos': 0.091, 'compound': 0.0258}
ENTITY: outlet, taste, alcohol, thing 	FEATURES: secondary 	POSITIVE

chef must see the taste
{

In [34]:
sia.polarity_scores (reviews[0])

{'neg': 0.16, 'neu': 0.794, 'pos': 0.046, 'compound': -0.9096}

In [35]:
reviews = [
    "If you are about to throw birthday parties with family and close frenzz then barbeque nation has it all. From gogappe to veg n non-veg starters to veg n non- veg main course to deserts, kulfies, gulab jamun , rasmalai everything. What else you could demand for than having served all on ur plate. All you need to do is sit back with empty tummy and enjoy each n every ounce of its food..."
]

In [36]:
sa = SentimentAnalyzer (reviews)
sa.prepare ()

In [37]:
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    if scores['compound'] > 0:
        print ("POSITIVE")
    elif scores['compound'] == 0:
        print ("NEUTRAL")
    else:
        print ("NEGATIVE");
    counter += 1
    print ()

if you are about to throw birthday parties with family
{'neg': 0.0, 'neu': 0.769, 'pos': 0.231, 'compound': 0.4019}
ENTITY: birthday, parties, family 	FEATURES: about     	POSITIVE

close frenzz
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: frenzz     	FEATURES: close     	NEUTRAL

barbeque nation has it all .
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: barbeque, nation 	FEATURES:           	NEUTRAL

from gogappe to veg n non - veg starters to veg n
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: gogappe, n, starters, veg, n 	FEATURES: non, -, veg	NEUTRAL

veg main course to deserts
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: course, deserts 	FEATURES: main      	NEUTRAL

kulfies
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: kulfies    	FEATURES:           	NEUTRAL

gulab jamun
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: gulab, jamun 	FEATURES:           	NEUTRAL

rasmalai everything .
{

In [38]:
sia.polarity_scores (reviews[0])

{'neg': 0.044, 'neu': 0.878, 'pos': 0.078, 'compound': 0.5574}

In [39]:
reviews = [
    "Easily one of the worst places that I have been to in recent times.We ordered classic martini along with dahi kebabs. Their martinis were so bad we had to cancel them. Dahi kebabs had just a pea size layer of dahi in a sort of bread and alu tikki. I am sorry but that is not how you make dahi kebabs. The look and feel of the place is also quite a turn off.Would definitely advise to give this place a miss."
]

In [40]:
sa = SentimentAnalyzer (reviews)

In [41]:
sa.prepare()

In [42]:
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    if scores['compound'] > 0:
        print ("POSITIVE")
    elif scores['compound'] == 0:
        print ("NEUTRAL")
    else:
        print ("NEGATIVE");
    counter += 1
    print ()

one of the worst places that i have been to in recent times.we ordered classic martini along with dahi kebabs .
{'neg': 0.186, 'neu': 0.814, 'pos': 0.0, 'compound': -0.6249}
ENTITY: places, martini, dahi, kebabs 	FEATURES: worst, recent, classic	NEGATIVE

their martinis were bad we had to cancel them .
{'neg': 0.44, 'neu': 0.56, 'pos': 0.0, 'compound': -0.6705}
ENTITY: martinis   	FEATURES: bad       	NEGATIVE

dahi kebabs had a pea size layer of dahi in a sort of bread
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: dahi, kebabs, pea, size, layer, dahi, sort, bread 	FEATURES:           	NEUTRAL

alu tikki .
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: alu, tikki 	FEATURES:           	NEUTRAL

i am sorry
{'neg': 0.565, 'neu': 0.435, 'pos': 0.0, 'compound': -0.0772}
ENTITY:            	FEATURES: sorry     	NEGATIVE

that is not you make dahi kebabs .
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: dahi, kebabs 	FEATURES: make      	NEUTRAL

t

In [43]:
sia.polarity_scores ("that is not how you make dahi kababs")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [44]:
sia.polarity_scores ("You do not how to cook food")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [45]:
x = sia.polarity_scores

In [46]:
x('The servers overthere are fools')

{'neg': 0.444, 'neu': 0.556, 'pos': 0.0, 'compound': -0.4939}

In [47]:
x ("The waiters were idiot")

{'neg': 0.524, 'neu': 0.476, 'pos': 0.0, 'compound': -0.5106}

In [48]:
x ("idiot")

{'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound': -0.5106}

In [49]:
x ("I like the benetton tshirts but their pants are bad")

{'neg': 0.352, 'neu': 0.519, 'pos': 0.13, 'compound': -0.6124}

In [50]:
x ("Benetton tshirts are bad but i like their pants")

{'neg': 0.196, 'neu': 0.522, 'pos': 0.283, 'compound': 0.25}

In [51]:
r = [ "I like the benetton tshirts but their pants were not okay" ]

In [52]:
sa = SentimentAnalyzer(r)
sa.prepare()
counter = 0
for split in sa.splits:
    sent = ' '.join (map(str, split))
    print (sent)
    scores = sia.polarity_scores (sent)
    print (scores)
    print (f"ENTITY: {sa.features[counter][0]:<10} \tFEATURES: {sa.features[counter][1]:<10}", end="\t")
    if scores['compound'] > 0:
        print ("POSITIVE")
    elif scores['compound'] == 0:
        print ("NEUTRAL")
    else:
        print ("NEGATIVE");
    counter += 1
    print ()

i like the benetton tshirts
{'neg': 0.0, 'neu': 0.545, 'pos': 0.455, 'compound': 0.3612}
ENTITY: benetton, tshirts 	FEATURES: like      	POSITIVE

their pants were not okay
{'neg': 0.294, 'neu': 0.706, 'pos': 0.0, 'compound': -0.1695}
ENTITY: pants      	FEATURES: okay      	NEGATIVE



In [53]:
doc = sa.nlp ("i am balor.i am Anshuman")
for t in doc:
    print (t, t.pos, t.pos_, t.tag, t.tag_)

i 95 PRON 13656873538139661788 PRP
am 87 AUX 9188597074677201817 VBP
balor.i 92 NOUN 783433942507015291 NNS
am 87 AUX 9188597074677201817 VBP
Anshuman 96 PROPN 15794550382381185553 NNP


In [54]:
list(doc.sents)

[i am balor.i am Anshuman]

In [55]:
sa = SentimentAnalyzer(['i am balor.i am Anshuman'])
sa.prepare()
print (sa.splits)
sa.pprint()

[[i, am, balor.i, am, anshuman]]
i am balor.i am anshuman
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
ENTITY: balor.i, anshuman 	FEATURES:           	NEUTRAL




In [56]:
sa = SentimentAnalyzer()
doc = sa.nlp ("I am going home.")
for token in doc:
    print (token, token.pos, token.pos_)

TypeError: __init__() missing 1 required positional argument: 'reviews'