# Aspect-Based Sentiment Analysis

Code taken from: https://medium.com/analytics-vidhya/aspect-based-sentiment-analysis-a-practical-approach-8f51029bbc4a

In [1]:
import stanza
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
stanza.download('en')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.2.0.json: 128kB [00:00, 22.2MB/s]                    
2021-04-12 06:34:59 INFO: Downloading default packages for language: en (English)...
2021-04-12 06:35:00 INFO: File exists: /Users/TL/stanza_resources/en/default.zip.
2021-04-12 06:35:03 INFO: Finished downloading models and saved to /Users/TL/stanza_resources.
[nltk_data] Downloading package stopwords to /Users/TL/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/TL/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/TL/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/TL/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
nlp = stanza.Pipeline('en', verbose=True)

2021-04-12 06:35:04 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| pos       | combined  |
| lemma     | combined  |
| depparse  | combined  |
| sentiment | sstplus   |
| ner       | ontonotes |

2021-04-12 06:35:04 INFO: Use device: cpu
2021-04-12 06:35:04 INFO: Loading: tokenize
2021-04-12 06:35:04 INFO: Loading: pos
2021-04-12 06:35:04 INFO: Loading: lemma
2021-04-12 06:35:04 INFO: Loading: depparse
2021-04-12 06:35:05 INFO: Loading: sentiment
2021-04-12 06:35:05 INFO: Loading: ner
2021-04-12 06:35:06 INFO: Done loading processors!


In [4]:
sia = SentimentIntensityAnalyzer()

In [5]:
text = "I hate the service, but the food is great!"

In [6]:
def absa(txt, nlp, stopwords, sia):
    txt = txt.lower()
    sent_list = nltk.sent_tokenize(txt)
    
    fcluster = []
    totalfeatureList = []
    finalcluster = []
    dic = {}
    
    for sent in sent_list:
        wordlist = nltk.word_tokenize(sent)
        tags = nltk.pos_tag(wordlist)
        
        # Get new tokens and pos tags after joining nouns
        new_sent, new_wordlist = join_nouns(tags)
        new_tags = nltk.pos_tag(new_wordlist)
        
        # Remove stopwords
        new_tags = [i for i in new_tags if i[0] not in stopwords]

        doc = nlp(new_sent)
    
        # Getting the dependency relations between the words
        dep_node = []
        for dep_edge in doc.sentences[0].dependencies:
            dep_node.append([dep_edge[2].text, dep_edge[0].id, dep_edge[1]])
        
        # Convert to appropriate format
        for i in range(0, len(dep_node)):
            if (int(dep_node[i][1]) != 0):
                dep_node[i][1] = new_wordlist[(int(dep_node[i][1]) - 1)]
        
        featureList = []
        categories = []
        for i in new_tags:
            if(i[1]=='JJ' or i[1]=='NN' or i[1]=='JJR' or i[1]=='NNS' or i[1]=='RB'):
                featureList.append(list(i)) # For features for each sentence
                totalfeatureList.append(list(i)) # Stores the features of all the sentences in the text
                categories.append(i[0])

        for i in featureList:
            filist = []
            for j in dep_node:
                if((j[0]==i[0] or j[1]==i[0]) and (j[2] in ["nsubj", "acl:relcl", "obj", "dobj", "agent", "advmod", "amod", "neg", "prep_of", "acomp", "xcomp", "compound"])):
                    if(j[0]==i[0]):
                        filist.append(j[1])
                    else:
                        filist.append(j[0])
            fcluster.append([i[0], filist])
            
    for i in totalfeatureList:
        dic[i[0]] = i[1]
    
    for i in fcluster:
        if(dic[i[0]]=="NN"):
            finalcluster.append(i)
            
    
    # Sentiment Analysis
    results = []
    for aspect, opinion_words in finalcluster:
        sentiment = sia.polarity_scores(" ".join(opinion_words))['compound']
        results.append((aspect, sentiment))
        
    return(results)

In [7]:
def join_nouns(pos_tag_list):
    """Given a list of 2-tuples (word, pos_tag),
    join all consecutive 'NN' tags to create a new sentence.
    
    Returns:
     -- final_text: the new sentence with joined nouns.
     -- new_wordlist: the tokens for the new sentence.
    """
    new_wordlist = []
    noun_buffer = []
    for idx, elem in enumerate(pos_tag_list):
        word, pos = elem
        if pos in ("NN", "NNS"):
            noun_buffer.append(word)
        else:
            if noun_buffer:
                new_wordlist.append(''.join(noun_buffer))
                noun_buffer.clear()
            new_wordlist.append(word)
        
        if idx == len(pos_tag_list)-1:
            if noun_buffer:
                new_wordlist.append(''.join(noun_buffer))
    final_text = ' '.join(new_wordlist[:-1]) + '.'
    return final_text, new_wordlist

In [8]:
stop = stopwords.words('english')
absa(text, nlp, stop, sia)

[('service', -0.5719), ('food', 0.6249)]