# Aspect-Based Sentiment Analysis

Code modified from: https://medium.com/analytics-vidhya/aspect-based-sentiment-analysis-a-practical-approach-8f51029bbc4a

In [1]:
import stanza
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
stanza.download('en')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.2.0.json: 128kB [00:00, 13.5MB/s]                    
2021-05-04 23:12:05 INFO: Downloading default packages for language: en (English)...
2021-05-04 23:12:05 INFO: File exists: /Users/TL/stanza_resources/en/default.zip.
2021-05-04 23:12:08 INFO: Finished downloading models and saved to /Users/TL/stanza_resources.
[nltk_data] Downloading package stopwords to /Users/TL/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/TL/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/TL/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/TL/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
def absa(txt, nlp, stopwords, sia):
    txt = txt.lower()
    sent_list = nltk.sent_tokenize(txt)
    
    fcluster = []
    totalfeatureList = []
    finalcluster = []
    dic = {}
    
    for sent in sent_list:
        wordlist = nltk.word_tokenize(sent)
        tags = nltk.pos_tag(wordlist)
        
        # Get new tokens and pos tags after joining nouns
        new_wordlist = join_nouns(tags)
        new_tags = nltk.pos_tag(new_wordlist)
        
        # Remove stopwords
        new_tags = [i for i in new_tags if i[0] not in stopwords]
        doc = nlp([new_wordlist])
    
        # Getting the dependency relations between the words
        dep_node = []
        for dep_edge in doc.sentences[0].dependencies:
            dep_node.append([dep_edge[2].text, dep_edge[0].id, dep_edge[1]])
        
        # Convert to appropriate format
        for node in dep_node:
            if (int(node[1]) != 0):
                node[1] = new_wordlist[(int(node[1]) - 1)]
        
        featureList = []
        categories = []
        for i in new_tags:
            if(i[1]=='JJ' or i[1]=='NN' or i[1]=='JJR' or i[1]=='NNS' or i[1]=='RB'):
                featureList.append(list(i)) # For features for each sentence
                totalfeatureList.append(list(i)) # Stores the features of all the sentences in the text
                categories.append(i[0])

        for i in featureList:
            filist = []
            for j in dep_node:
                if((j[0]==i[0] or j[1]==i[0]) and (j[2] in ["nsubj", "acl:relcl", "obj", "dobj", "agent", "advmod", "amod", "neg", "prep_of", "acomp", "xcomp", "compound"])):
                    if(j[0]==i[0]):
                        filist.append(j[1])
                    else:
                        filist.append(j[0])
            fcluster.append([i[0], filist])
            
    for i in totalfeatureList:
        dic[i[0]] = i[1]
    
    for i in fcluster:
        if(dic[i[0]]=="NN"):
            finalcluster.append(i)
            
    
    # Sentiment Analysis
    results = []
    for aspect, opinion_words in finalcluster:
        sentiment = sia.polarity_scores(" ".join(opinion_words))['compound']
        results.append((aspect, sentiment))
        
    return(results)

In [4]:
def join_nouns(pos_tag_list):
    """Given a list of 2-tuples (word, pos_tag),
    join all consecutive 'NN' tags to create a new sentence.
    
    Returns:
     -- final_text: the new sentence with joined nouns.
     -- new_wordlist: the tokens for the new sentence.
    """
    new_wordlist = []
    noun_buffer = []
    for idx, elem in enumerate(pos_tag_list):
        word, pos = elem
        if pos in ("NN", "NNS"):
            noun_buffer.append(word)
        else:
            if noun_buffer:
                new_wordlist.append(''.join(noun_buffer))
                noun_buffer.clear()
            new_wordlist.append(word)
        
        if idx == len(pos_tag_list)-1:
            if noun_buffer:
                new_wordlist.append(''.join(noun_buffer))
#     final_text = ' '.join(new_wordlist[:-1]) + '.'
    return new_wordlist

In [5]:
# Retrieve text from subtitle file
with open("../resources/test_sub.vtt") as f:
    sub = f.readlines()

sub = [line if line!='\n' else '<BREAK>' for line in sub]
text = ""
for line in sub[3:]:
    if line != "<BREAK>" and "-->" not in line:
        text += line
text = text.replace("\n", " ")

In [6]:
nlp = stanza.Pipeline(lang='en', tokenize_pretokenized=True)
sia = SentimentIntensityAnalyzer()
stop = stopwords.words('english')

2021-05-04 23:12:10 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| pos       | combined  |
| lemma     | combined  |
| depparse  | combined  |
| sentiment | sstplus   |
| ner       | ontonotes |

2021-05-04 23:12:10 INFO: Use device: cpu
2021-05-04 23:12:10 INFO: Loading: tokenize
2021-05-04 23:12:10 INFO: Loading: pos
2021-05-04 23:12:10 INFO: Loading: lemma
2021-05-04 23:12:10 INFO: Loading: depparse
2021-05-04 23:12:11 INFO: Loading: sentiment
2021-05-04 23:12:11 INFO: Loading: ner
2021-05-04 23:12:11 INFO: Done loading processors!


In [7]:
res = sorted(absa(text[2:], nlp, stop, sia), key=lambda x: x[1], reverse=True)

In [8]:
final_res = {}
counts = {}

for aspect, polarity in res:
    # Calculate running average polarity for each aspect
    class_ = round(polarity)
    counts[aspect] = counts.get(aspect, 0) + 1
    final_res[aspect] = (final_res.get(aspect, 0) * (counts.get(aspect, 0)-1) + polarity) / (counts.get(aspect))
    final_res[aspect] = round(final_res[aspect], 2)

# Remove neutral words from final_res
final_res = {k:v for k,v in final_res.items() if abs(v) > 0.1}

In [9]:
sorted(final_res.items(), key=lambda x: x[1], reverse=True)

[('color', 0.76),
 ('hdrcontentconsumptiondisplay', 0.64),
 ('microphone', 0.62),
 ('smoothness', 0.53),
 ('buck', 0.49),
 ('power', 0.49),
 ('darnloud', 0.49),
 ('environment', 0.49),
 ('someone', 0.46),
 ('spot', 0.46),
 ('job', 0.44),
 ('feel', 0.42),
 ('fluidity', 0.42),
 ('hertzdisplay', 0.42),
 ('everything', 0.4),
 ('approach', 0.4),
 ('tag', 0.4),
 ('step', 0.36),
 ('megapixelselfiecamera', 0.36),
 ('reaction', 0.32),
 ('leap', 0.32),
 ('stabilization', 0.3),
 ('s21name', 0.3),
 ('test', 0.2),
 ('battery', 0.19),
 ('touch', 0.15),
 ('point', 0.15),
 ('phone', 0.15),
 ('apple', 0.13),
 ('thing', -0.11),
 ('shooter', -0.11),
 ('couple', -0.12),
 ('speedadvantage', -0.24),
 ('awesome', -0.42),
 ('glass', -0.48),
 ('notch', -0.48),
 ('cpus', -0.51),
 ('beautifying', -0.53)]