# Sentiment Analysis

Commonly used libraries.

In [4]:
from pprint import pprint
from termcolor import colored, cprint

Global variables.

In [5]:
line_color = 'green'
text_color = 'magenta'

## Textblob

In [6]:
import textblob as tb
from textblob import TextBlob
from typing import Callable

In [7]:
text = 'Artificial intelligence will change the world, in a good way of course.'
tb_obj = TextBlob(text=text)

cprint(text='-' * 100, color=line_color)
cprint(text='Sentiment assessments:', color=text_color)
cprint(text='-' * 100, color=line_color)
for attr in dir(tb_obj.sentiment_assessments):
  if attr.isalpha():
    eval_str = f'tb_obj.sentiment_assessments.{attr}'
    pprint(f'{attr}: {eval(eval_str)}')

[32m----------------------------------------------------------------------------------------------------[0m
[35mSentiment assessments:[0m
[32m----------------------------------------------------------------------------------------------------[0m
("assessments: [(['artificial'], -0.6, 1.0, None), (['good'], 0.7, "
 '0.6000000000000001, None)]')
'count: <built-in method count of Sentiment object at 0x7f6e5cefcfb0>'
'index: <built-in method index of Sentiment object at 0x7f6e5cefcfb0>'
'polarity: 0.04999999999999999'
'subjectivity: 0.8'


Sentiments of Synonyms

In [8]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [9]:
from textblob import Word
from textblob.wordnet import Synset

word = 'intelligence'
word_obj = Word(string=word)

# Definitions of intelligence
cprint(text='-' * 100, color=line_color)
cprint(text='Definitions: ', color=text_color)
cprint(text='-' * 100, color=line_color)
pprint(word_obj.define())

# Synonyms
cprint(text='-' * 100, color=line_color)
cprint(text='Sentiment assessments:', color=text_color)
cprint(text='-' * 100, color=line_color)
pprint(word_obj.get_synsets())

[32m----------------------------------------------------------------------------------------------------[0m
[35mDefinitions: [0m
[32m----------------------------------------------------------------------------------------------------[0m
['the ability to comprehend; to understand and profit from experience',
 'a unit responsible for gathering and interpreting information about an enemy',
 'secret information about an enemy (or potential enemy)',
 'information about recent and important events',
 'the operation of gathering information about an enemy']
[32m----------------------------------------------------------------------------------------------------[0m
[35mSentiment assessments:[0m
[32m----------------------------------------------------------------------------------------------------[0m
[Synset('intelligence.n.01'),
 Synset('intelligence.n.02'),
 Synset('intelligence.n.03'),
 Synset('news.n.01'),
 Synset('intelligence.n.05')]


## Domains of Sentiment Analysis

1. Knowledge-Based: Classification of text based on emotional words in text. 
  - Lexicon-based is more specific subtype that assigns polarity and sentiment scores to each word in a text to compute a grand total score per category.

2. Statistical: Uses statistical and machine learning techniques to classify sentiment. 
  - This overlaps sentiment analysis with emotion detection.

3. Hybrid: Applies aspects of both knowledge-based and statistical techniques.

## Types of Sentiment Analysis by Application

1. aspect-based
2. intent analysis

### Aspect-Based Sentiment Analysis

In [1]:
!pip install aspect_based_sentiment_analysis

Collecting aspect_based_sentiment_analysis
  Downloading aspect_based_sentiment_analysis-2.0.3-py3-none-any.whl (35 kB)
Collecting tensorflow==2.5
  Downloading tensorflow-2.5.0-cp37-cp37m-manylinux2010_x86_64.whl (454.3 MB)
[K     |████████████████████████████████| 454.3 MB 16 kB/s 
[?25hCollecting testfixtures
  Downloading testfixtures-6.18.2-py2.py3-none-any.whl (95 kB)
[K     |████████████████████████████████| 95 kB 5.1 MB/s 
Collecting optuna
  Downloading optuna-2.9.1-py3-none-any.whl (302 kB)
[K     |████████████████████████████████| 302 kB 67.7 MB/s 
[?25hCollecting transformers==4.8
  Downloading transformers-4.8.0-py3-none-any.whl (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 56.7 MB/s 
Collecting grpcio~=1.34.0
  Downloading grpcio-1.34.1-cp37-cp37m-manylinux2014_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 54.6 MB/s 
Collecting keras-nightly~=2.5.0.dev
  Downloading keras_nightly-2.5.0.dev2021032900-py2.py3-none-any.whl (1.2 MB)


In [10]:
import aspect_based_sentiment_analysis as absa

nlp = absa.load()
text = ("I was so lucky today. I won the grand prize.")
pprint(nlp(text, aspects=['I', 'prize']))

Some layers from the model checkpoint at absa/classifier-rest-0.2 were not used when initializing BertABSClassifier: ['dropout_379']
- This IS expected if you are initializing BertABSClassifier from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertABSClassifier from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of BertABSClassifier were not initialized from the model checkpoint at absa/classifier-rest-0.2 and are newly initialized: ['dropout_113']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


CompletedTask(text='I was so lucky today. I won the grand prize.', aspects=['I', 'prize'], subtasks=OrderedDict([('I', CompletedSubTask(text='I was so lucky today. I won the grand prize.', aspect='I', examples=[PredictedExample(text='I was so lucky today. I won the grand prize.', aspect='I', sentiment=<Sentiment.positive: 2>, text_tokens=['i', 'was', 'so', 'lucky', 'today', '.', 'i', 'won', 'the', 'grand', 'prize', '.'], text_subtokens=['i', 'was', 'so', 'lucky', 'today', '.', 'i', 'won', 'the', 'grand', 'prize', '.'], aspect_tokens=['i'], aspect_subtokens=['i'], tokens=['[CLS]', 'i', 'was', 'so', 'lucky', 'today', '.', 'i', 'won', 'the', 'grand', 'prize', '.', '[SEP]', 'i', '[SEP]'], subtokens=['[CLS]', 'i', 'was', 'so', 'lucky', 'today', '.', 'i', 'won', 'the', 'grand', 'prize', '.', '[SEP]', 'i', '[SEP]'], alignment=[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15]], scores=[0.0028238886, 0.0012040298, 0.9959721], review=Review(is_reference=None, 

In [12]:
recognizer = absa.aux_models.BasicPatternRecognizer()
nlp = absa.load(pattern_recognizer=recognizer)
completed_task = nlp(text=text, aspects=['I', 'prize'])
I, prize = completed_task.examples

Some layers from the model checkpoint at absa/classifier-rest-0.2 were not used when initializing BertABSClassifier: ['dropout_379']
- This IS expected if you are initializing BertABSClassifier from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertABSClassifier from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of BertABSClassifier were not initialized from the model checkpoint at absa/classifier-rest-0.2 and are newly initialized: ['dropout_189']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
absa.summary(I)
absa.display(I.review)

Sentiment.positive for "I"
Scores (neutral/negative/positive): [0.003 0.001 0.996]


In [14]:
absa.summary(prize)
absa.display(prize.review)

Sentiment.positive for "prize"
Scores (neutral/negative/positive): [0.001 0.001 0.998]
