#  1. Data Loading and Text Preprocessing

## 1.1 Data Loading and Preprocessing

#### 1.1.1 Import Libraries

In [1]:
import xmltodict
import nltk
nltk.download('omw-1.4')
from nltk.tokenize import word_tokenize
# !pip install TextBlob
from textblob import TextBlob
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\manho\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
# function to remove stopwords
def remove_stopwords(text):
    text_token= [word for word in word_tokenize(text) if not word in Stopwords]
    text = ' '.join(text_token)
    return text

#### 1.1.2 Data Loading and Exploring

In [3]:
with open("Restaurants.xml") as fd:
    doc = xmltodict.parse(fd.read())

In [4]:
doc = doc['sentences']
doc

OrderedDict([('sentence',
              [OrderedDict([('@id', '3121'),
                            ('text', 'But the staff was so horrible to us.'),
                            ('aspectTerms',
                             OrderedDict([('aspectTerm',
                                           OrderedDict([('@term', 'staff'),
                                                        ('@polarity',
                                                         'negative'),
                                                        ('@from', '8'),
                                                        ('@to', '13')]))])),
                            ('aspectCategories',
                             OrderedDict([('aspectCategory',
                                           OrderedDict([('@category',
                                                         'service'),
                                                        ('@polarity',
                                                         'negative

In [5]:
len(doc['sentence'])

3041

#### 1.1.3 Data Preprocessing

In [6]:
# delete no aspect terms
for i in range(len(doc['sentence'])-1, 0, -1):
    if 'aspectTerms' not in doc['sentence'][i].keys():
        del doc['sentence'][i]

In [7]:
# create dictionary for holding necessary data
data_dict = {}
original_data_dict = {}
term_dict = {}
polarity_dict = {}
from_dict = {}
to_dict = {}

In [8]:
for i in range(len(doc['sentence'])):
    term_list = []
    polarity_list = []
    from_list = []
    to_list = []
    
    try:
            term_list.append(doc['sentence'][i]['aspectTerms']['aspectTerm']['@term'].lower()) # lower case for aspect terms
            polarity_list.append(doc['sentence'][i]['aspectTerms']['aspectTerm']['@polarity'])
            
    except:
        for j in range(len(doc['sentence'][i]['aspectTerms']['aspectTerm'])):
                term_list.append(doc['sentence'][i]['aspectTerms']['aspectTerm'][j]['@term'].lower()) # lower case for aspect terms
                polarity_list.append(doc['sentence'][i]['aspectTerms']['aspectTerm'][j]['@polarity'])

    term_dict[i] = term_list
    polarity_dict[i] = polarity_list

In [9]:
# create the dictionary that hold all data needed
for i in range(len(doc['sentence'])):
    data_dict[i] = {'id': doc['sentence'][i]['@id'], 
                    'text': doc['sentence'][i]['text'].lower(),
                    'term': term_dict[i],
                    'polarity': polarity_dict[i],
                    }
    
    original_data_dict[i] = {'id': doc['sentence'][i]['@id'], 
                'text': doc['sentence'][i]['text'].lower(),
                'term': term_dict[i],
                'polarity': polarity_dict[i],
                }

In [10]:
# combine aspect term with more than 1 word into one word in both sentence and aspect term
for i in range(len(data_dict)):
    for j in range(len(data_dict[i]['term'])):
        if len(data_dict[i]['term'][j].split())>1:
            old_term = data_dict[i]['term'][j]
            new_term = data_dict[i]['term'][j].replace(' ', '')
            data_dict[i]['term'][j] = new_term
            data_dict[i]['text'] = data_dict[i]['text'].replace(old_term, new_term)

In [11]:
from nltk.corpus import stopwords
Stopwords = stopwords.words("english")
Stopwords

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [12]:
# remove neglet words in stopwords
Stopwords.remove('no')
Stopwords.remove('not')
Stopwords.remove('don\'t')
Stopwords.remove('aren\'t')
Stopwords.remove('couldn\'t')
Stopwords.remove('didn\'t')
Stopwords.remove('hadn\'t')
Stopwords.remove('hasn\'t')
Stopwords.remove('haven\'t')
Stopwords.remove('doesn\'t')
Stopwords.remove('isn\'t')
Stopwords.remove('mightn\'t')
Stopwords.remove('mustn\'t')     
Stopwords.remove('needn\'t') 
Stopwords.remove('shan\'t') 
Stopwords.remove('shouldn\'t')
Stopwords.remove('wasn\'t')
Stopwords.remove('weren\'t')
Stopwords.remove('won\'t')
Stopwords.remove('wouldn\'t')

In [13]:
# remove stopwords in sentence
for i in range(len(data_dict)):
    data_dict[i]['text'] = data_dict[i]['text'].lower()
    data_dict[i]['text'] = remove_stopwords(data_dict[i]['text'])   

In [14]:
data_dict

{0: {'id': '3121',
  'text': 'staff horrible us .',
  'term': ['staff'],
  'polarity': ['negative']},
 1: {'id': '2777',
  'text': "completely fair , redeeming factor food , average , could n't make deficiencies teodora .",
  'term': ['food'],
  'polarity': ['positive']},
 2: {'id': '1634',
  'text': "food uniformly exceptional , capable kitchen proudly whip whatever feel like eating , whether 's menu not .",
  'term': ['food', 'kitchen', 'menu'],
  'polarity': ['positive', 'positive', 'neutral']},
 3: {'id': '2846',
  'text': "not food outstanding , little 'perks ' great .",
  'term': ['food', 'perks'],
  'polarity': ['positive', 'positive']},
 4: {'id': '1458',
  'text': 'agreed favorite orrechietewithsausageandchicken ( usually waiters kind enough split dish half get sample meats ) .',
  'term': ['orrechietewithsausageandchicken', 'waiters', 'meats', 'dish'],
  'polarity': ['positive', 'positive', 'neutral', 'neutral']},
 5: {'id': '3161',
  'text': 'bagels outstanding taste terrifi

In [15]:
len(data_dict)

2021

In [16]:
# Create ground truth list
ground_truth_list = []
for i in range(len(data_dict)):
    for j in range(len(data_dict[i]['polarity'])):
        if(data_dict[i]['polarity'][j]!='conflict'):
            ground_truth_list.append(data_dict[i]['polarity'][j])

In [17]:
print('Actual Number in Ground Truth:')
print('Number of Positive: ' + str(ground_truth_list.count('positive')))
print('Number of Neutral: ' + str(ground_truth_list.count('neutral')))
print('Number of Negative: ' + str(ground_truth_list.count('negative')))

Actual Number in Ground Truth:
Number of Positive: 2164
Number of Neutral: 633
Number of Negative: 805


In [18]:
TextBlob('overripe').sentiment.polarity 

0.0

# 2. Dependency Parsing and Aspect Term Based Rules

In [19]:
# definition of polarity according to their value from Textblob
# Positive: 0.1 - 1.0
# Netural: 0.0 - 0.1
# Negative: -1.0 - 0.0

def polarity_define(value):
    if value <0:
        polarity = 'negative'
    elif value < 0.1:
        polarity = 'neutral'
    else:
        polarity = 'positive'
    return polarity

In [20]:
# function to calculate Recall and Precision
def Recall_Precision_Cal(predicted, actual, label):
    TP = 0
    FN = 0
    FP = 0
    
    for i in range(len(predicted)):
        if ((actual[i]== label) & (predicted[i]==label)):
            TP +=1
        if ((actual[i]== label) & (predicted[i]!=label)):
            FN +=1
        if ((actual[i]!= label) & (predicted[i]==label)):
            FP +=1
            
    Recall = TP/(TP+FN)
    Precision = TP/(TP+FP)
    return (float(format(Recall, ".4f")) , float(format(Precision, ".4f")))

### Poisitive Rule 1 (General rule)

In [21]:
adjective_list = []
num = 0

for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    sen = nlp(sentence)
    for j in range(len(data_dict[i]['term'])):
        if(data_dict[i]['polarity'][j]!='conflict'):
            num += 1
            for token in sen:
                if str(token) == data_dict[i]['term'][j]:
                    head = token.head
                    children = [child for child in token.children]
            
            
            # Use aspect term's Head as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                            adjective_list.append(token)
                            
                            
            # Use aspect term's children as an adjective
            if(num > len(adjective_list)):
                for child in children:
                    for token in sen:
                        if(num > len(adjective_list)):
                            if(str(token)==str(child)):
                                if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                    adjective_list.append(token)
                                    break
            
            # Use aspect term's Head's children as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        head_children = [child for child in token.children]

                        for child in head_children:
                            for token in sen:
                                if(num > len(adjective_list)):
                                    if(str(token)==str(child)):
                                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                            adjective_list.append(token)
                                            break
            
                                                                                     
            # Find the adjective near the aspect term in sentence as adjective
            if(num > len(adjective_list)):
                fufill = False
                try:
                    index = sentence.split().index(str(data_dict[i]['term'][j]))
                    for k in range(index - 4, index + 4, 1):
                        if k >= 0 and k < len(sentence.split()):
                            if fufill == False:
                                if sen[k].dep =='amod' or sen[k].dep_ =='acomp' or sen[k].dep_=='advmod' or sen[k].pos_ == 'ADJ':
                                    adjective_list.append(sen[k])
                                    fufill = True
                except:
                    None
            
            # Others are treated it as Neutral
            if(num > len(adjective_list)):
                adjective_list.append('neutral')
                
pos1_score_list = [TextBlob(str(element)).sentiment.polarity for element in adjective_list]
pos1_predicted_list = [polarity_define(element) for element in pos1_score_list]
Recall_Precision_Cal(pos1_predicted_list, ground_truth_list, 'positive')

(0.5434, 0.8082)

In [22]:
print('Number of Positive: ' + str(pos1_predicted_list.count('positive')))
print('Number of Neutral: ' + str(pos1_predicted_list.count('neutral')))
print('Number of Negative: ' + str(pos1_predicted_list.count('negative')))

Number of Positive: 1455
Number of Neutral: 1824
Number of Negative: 323


In [23]:
adjective_list

[horrible,
 redeeming,
 exceptional,
 capable,
 menu,
 outstanding,
 outstanding,
 favorite,
 orrechietewithsausageandchicken,
 half,
 enough,
 outstanding,
 nevertheless,
 mayonnaise,
 mayonnaise,
 cooked,
 'neutral',
 forgot,
 cooked,
 hot,
 'neutral',
 perfect,
 good,
 good,
 expert,
 best,
 best,
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 enough,
 whole,
 'neutral',
 'neutral',
 best,
 friendly,
 friendly,
 little,
 incredible,
 warm,
 friendly,
 great,
 great,
 cute,
 cute,
 free,
 poor,
 poor,
 good,
 serious,
 closer,
 amazing,
 excellent,
 puffpastrygoatcheese,
 puffpastrygoatcheese,
 hangersteakaupoivre,
 'neutral',
 perfect,
 curtious,
 'neutral',
 mild,
 delicious,
 little,
 winelist,
 best,
 even,
 detailed,
 consistently,
 great,
 consistently,
 nice,
 pleasant,
 pleasant,
 really,
 really,
 'neutral',
 fabulous,
 fantastic,
 chilled,
 chilled,
 'neutral',
 'neutral',
 great,
 great,
 great,
 unpretensious,
 romantic,
 romantic,
 'neutral',
 generously,
 generously,


### Poisitive Rule 2 (if the sentence contains some positive words, then sentiment of aspect terms will become positive)

In [24]:
# Create a positive word list (polarity of these words are zero under Textblob)
positive_word_list = ['accomplishes', 'admire', 'best','affordable','amazed', 'amuse','relax','relaxed','relaxing','award','like','chill', 'diverse','chilled','complementary','entertain' ,'entertaining', 'entertainment', 'timely', 'toasted', 'complete', 'completed', 'amazing', 'wonderful', 'fantastic', 'incredible', 'yummy']

# additional score for having above positive words (most of them are non-adjective and have zero score under Textblob)
pos_rule_1_score_add = []
for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    add_score = 0
    for j in range(len(data_dict[i]['term'])):
        for k in range(len(positive_word_list)):
            if sentence.split().count(positive_word_list[k]):
                add_score += 0.1
        pos_rule_1_score_add.append(add_score)
        

zipped_lists = zip(pos1_score_list, pos_rule_1_score_add)
pos2_score_list = [x + y for (x, y) in zipped_lists]
pos2_predicted_list = [polarity_define(element) for element in pos2_score_list]
Recall_Precision_Cal(pos2_predicted_list, ground_truth_list, 'positive')

(0.6109, 0.7507)

In [25]:
print('Number of Positive: ' + str(pos2_predicted_list.count('positive')))
print('Number of Neutral: ' + str(pos2_predicted_list.count('neutral')))
print('Number of Negative: ' + str(pos2_predicted_list.count('negative')))

Number of Positive: 1761
Number of Neutral: 1533
Number of Negative: 308


### Poisitive Rule 3 (Negate words around negative words)

In [26]:
num = -1
negate_list = []
for i in range(len(data_dict)):
    sentence = original_data_dict[i]['text']
    sen = nlp(sentence)
    for j in range(len(data_dict[i]['term'])):
        negate_flag = 0
        if(data_dict[i]['polarity'][j]!='conflict'):
            num += 1
            
            # detect if the adjective is a negative word or a neutral word
            if TextBlob(str(adjective_list[num])).sentiment.polarity <= 0:
                try:
                    index = sentence.split().index(str(adjective_list[num]))
                    for k in range(index- 5, index + 5 , 1):
                        if k >= 0 and k < len(sentence.split()):
                            if sen[k].dep_=='neg': 
                                negate_flag = 1
                except:
                    None
                
                negate_list.append(negate_flag)
            else:
                
                negate_list.append(negate_flag)
                
zipped_lists = zip(pos2_score_list, negate_list)
pos3_score_list = [x + y for (x, y) in zipped_lists]
pos3_predicted_list = [polarity_define(element) for element in pos3_score_list]
Recall_Precision_Cal(pos3_predicted_list, ground_truth_list, 'positive')

(0.622, 0.7299)

In [27]:
print('Number of Positive: ' + str(pos3_predicted_list.count('positive')))
print('Number of Neutral: ' + str(pos3_predicted_list.count('neutral')))
print('Number of Negative: ' + str(pos3_predicted_list.count('negative')))

Number of Positive: 1844
Number of Neutral: 1484
Number of Negative: 274


### Neutral Rule #1 (General rule)

In [28]:
adjective_list = []
num = 0

for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    sen = nlp(sentence)
    for j in range(len(data_dict[i]['term'])):
        if(data_dict[i]['polarity'][j]!='conflict'):
            num += 1
            for token in sen:
                if str(token) == data_dict[i]['term'][j]:
                    head = token.head
                    children = [child for child in token.children]
            
            
            # Use aspect term's Head as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                            adjective_list.append(token)
                            
                            
            # Use aspect term's children as an adjective
            if(num > len(adjective_list)):
                for child in children:
                    for token in sen:
                        if(num > len(adjective_list)):
                            if(str(token)==str(child)):
                                if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                    adjective_list.append(token)
                                    break
            
            # Use aspect term's Head's children as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        head_children = [child for child in token.children]

                        for child in head_children:
                            for token in sen:
                                if(num > len(adjective_list)):
                                    if(str(token)==str(child)):
                                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                            adjective_list.append(token)
                                            break
            
                                                                                     
            # Find the adjective near the aspect term in sentence as adjective
            if(num > len(adjective_list)):
                fufill = False
                try:
                    index = sentence.split().index(str(data_dict[i]['term'][j]))
                    for k in range(index - 4, index + 4, 1):
                        if k >= 0 and k < len(sentence.split()):
                            if fufill == False:
                                if sen[k].dep =='amod' or sen[k].dep_ =='acomp' or sen[k].dep_=='advmod' or sen[k].pos_ == 'ADJ':
                                    adjective_list.append(sen[k])
                                    fufill = True
                except:
                    None
            
            # Others are treated it as Neutral
            if(num > len(adjective_list)):
                adjective_list.append('neutral')

                
neu1_score_list = [TextBlob(str(element)).sentiment.polarity for element in adjective_list]
neu1_predicted_list = [polarity_define(element) for element in neu1_score_list]
Recall_Precision_Cal(neu1_predicted_list, ground_truth_list, 'neutral')

(0.7251, 0.2516)

In [29]:
print('Number of Positive: ' + str(neu1_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neu1_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neu1_predicted_list.count('negative')))

Number of Positive: 1455
Number of Neutral: 1824
Number of Negative: 323


### Neutral Rule #2 (if the sentence contains some neutral words, then sentiment of aspect terms will become neutral)

In [30]:
# Create a neutral word list
neutral_word_list = ['ok','neutral', 'average', 'okay', 'fair']

# additional score for having above positive words (most of them are non-adjective and have zero score under Textblob)
neutral_rule_2_score_add = []
for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    neutral_score = 1
    for j in range(len(data_dict[i]['term'])):
        if(data_dict[i]['polarity'][j]!='conflict'):
            for k in range(len(neutral_word_list)):
                if sentence.split().count(neutral_word_list[k]):
                    neutral_score = 0
            neutral_rule_2_score_add.append(neutral_score)
        

zipped_lists = zip(neu1_score_list, neutral_rule_2_score_add)
neu2_score_list = [x * y for (x, y) in zipped_lists]
neu2_predicted_list = [polarity_define(element) for element in neu2_score_list]
Recall_Precision_Cal(neu2_predicted_list, ground_truth_list, 'neutral')

(0.7393, 0.2515)

In [31]:
print('Number of Positive: ' + str(neu2_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neu2_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neu2_predicted_list.count('negative')))

Number of Positive: 1435
Number of Neutral: 1861
Number of Negative: 306


### Neutral Rule #3 (Collect 2 words around aspect terms, and use them to predict sentiment)

In [32]:
neutral_rule_3_part_sentence = []
for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    for j in range(len(data_dict[i]['term'])):
        part_sentence = []
        if(data_dict[i]['polarity'][j]!='conflict'):
            try:
                index = sentence.split().index(str(data_dict[i]['term'][j]))
                for k in range(index - 1, index + 1, 1):
                    if k >= 0 and k < len(sentence.split()):
                        part_sentence.append(sentence.split()[k])
            except:
                part_sentence.append('')
        
        text = ' '.join(part_sentence)
        neutral_rule_3_part_sentence.append(text)
          
neutral_rule_3_score_add = [TextBlob(str(element)).sentiment.polarity for element in neutral_rule_3_part_sentence]

neutral_rule_3_score = []
for i in range(len(neu1_score_list)):
    if neu1_score_list[i] ==0:
        neutral_rule_3_score.append(neu1_score_list[i] + neutral_rule_3_score_add[i])
    else:
        neutral_rule_3_score.append(neu1_score_list[i])
            
neu3_predicted_list = [polarity_define(element) for element in neutral_rule_3_score]
Recall_Precision_Cal(neu3_predicted_list, ground_truth_list, 'neutral')

(0.5624, 0.25)

In [33]:
print('Number of Positive: ' + str(neu3_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neu3_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neu3_predicted_list.count('negative')))

Number of Positive: 1788
Number of Neutral: 1424
Number of Negative: 390


### Negative Rule #1 (General Rule)

In [34]:
adjective_list = []
num = 0

for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    sen = nlp(sentence)
    for j in range(len(data_dict[i]['term'])):
        if(data_dict[i]['polarity'][j]!='conflict'):
            num += 1
            for token in sen:
                if str(token) == data_dict[i]['term'][j]:
                    head = token.head
                    children = [child for child in token.children]
            
            
            # Use aspect term's Head as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                            adjective_list.append(token)
                            
                            
            # Use aspect term's children as an adjective
            if(num > len(adjective_list)):
                for child in children:
                    for token in sen:
                        if(num > len(adjective_list)):
                            if(str(token)==str(child)):
                                if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                    adjective_list.append(token)
                                    break
            
            # Use aspect term's Head's children as an adjective
            if(num > len(adjective_list)):
                for token in sen:
                    if str(token) == str(head):
                        head_children = [child for child in token.children]

                        for child in head_children:
                            for token in sen:
                                if(num > len(adjective_list)):
                                    if(str(token)==str(child)):
                                        if token.dep_ == 'amod' or token.dep_ =='acomp' or token.dep_=='advmod' or token.pos_ =='ADJ':
                                            adjective_list.append(token)
                                            break
            
                                                                                     
            # Find the adjective near the aspect term in sentence as adjective
            if(num > len(adjective_list)):
                fufill = False
                try:
                    index = sentence.split().index(str(data_dict[i]['term'][j]))
                    for k in range(index - 4, index + 4, 1):
                        if k >= 0 and k < len(sentence.split()):
                            if fufill == False:
                                if sen[k].dep =='amod' or sen[k].dep_ =='acomp' or sen[k].dep_=='advmod' or sen[k].pos_ == 'ADJ':
                                    adjective_list.append(sen[k])
                                    fufill = True
                except:
                    None
            
            # Others are treated it as Neutral
            if(num > len(adjective_list)):
                adjective_list.append('neutral')
                
neg1_score_list = [TextBlob(str(element)).sentiment.polarity for element in adjective_list]
neg1_predicted_list = [polarity_define(element) for element in neg1_score_list]
Recall_Precision_Cal(neg1_predicted_list, ground_truth_list, 'negative')

(0.2398, 0.5975)

In [35]:
print('Number of Positive: ' + str(neg1_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neg1_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neg1_predicted_list.count('negative')))

Number of Positive: 1455
Number of Neutral: 1824
Number of Negative: 323


### Negative Rule #2  (if the sentence contains some negative words, then sentiment of aspect terms will become negative)

In [36]:
# Create a negative word list (polarity score of below words are zero under textblov=b)
negative_word_list = ['worst', 'waterbug', 'waterbugs', 'concern','limited', 'crowded', 'forget', 'forgot','sticky', 'overpriced', 'salty', 'skipped', 'skip', 'overcooked', 'undercooked', 'oily']

# additional score for having above negative words (all of them have zero score under Textblob)
neg_rule_1_score_add = []
for i in range(len(data_dict)):
    sentence = data_dict[i]['text']
    add_score = 0
    for j in range(len(data_dict[i]['term'])):
        for k in range(len(negative_word_list)):
            if sentence.split().count(negative_word_list[k]):
                add_score -= 0.1
        neg_rule_1_score_add.append(add_score)
        

zipped_lists = zip(neg1_score_list, neg_rule_1_score_add)
neg2_score_list = [x + y for (x, y) in zipped_lists]
neg2_predicted_list = [polarity_define(element) for element in neg2_score_list]
Recall_Precision_Cal(neg2_predicted_list, ground_truth_list, 'negative')

(0.2683, 0.532)

In [37]:
print('Number of Positive: ' + str(neg2_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neg2_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neg2_predicted_list.count('negative')))

Number of Positive: 1445
Number of Neutral: 1751
Number of Negative: 406


### Negative Rule 3 (Negate words around negative words)

In [38]:
num = -1
negate_list = []
for i in range(len(data_dict)):
    sentence = original_data_dict[i]['text']
    sen = nlp(sentence)
    for j in range(len(data_dict[i]['term'])):
        negate_flag = 0
        if(data_dict[i]['polarity'][j]!='conflict'):
            num += 1
            
            # detect if the adjective is a positive word or a neutral word
            if TextBlob(str(adjective_list[num])).sentiment.polarity >= 0:
                try:
                    index = sentence.split().index(str(adjective_list[num]))
                    for k in range(index- 5, index + 5 , 1):
                        if k >= 0 and k < len(sentence.split()):
                            if sen[k].dep_=='neg': 
                                negate_flag = -1
                except:
                    None
                
                negate_list.append(negate_flag)
            else:
                
                negate_list.append(negate_flag)
                
zipped_lists = zip(neg2_score_list, negate_list)
neg3_score_list = [x + y for (x, y) in zipped_lists]
neg3_predicted_list = [polarity_define(element) for element in neg3_score_list]
Recall_Precision_Cal(neg3_predicted_list, ground_truth_list, 'negative')

(0.3429, 0.5227)

In [39]:
print('Number of Positive: ' + str(neg3_predicted_list.count('positive')))
print('Number of Neutral: ' + str(neg3_predicted_list.count('neutral')))
print('Number of Negative: ' + str(neg3_predicted_list.count('negative')))

Number of Positive: 1382
Number of Neutral: 1692
Number of Negative: 528


## Example

In [40]:
sentence = data_dict[0]['text']
sentence = remove_stopwords("The design and atmosphere is just as good.")

sen = nlp(sentence)

# title head
print ("{:<13} | {:<9} | {:<9} |{:<13} | {:<22}".format('Token','Relation', 'Pos', 'Head', 'Children'))
print ("-" * 70)

for token in sen:
    # Print the token, relation dependency, part of speech, head and all dependents of the token
    print ("{:<13} | {:<9} | {:<9} | {:<13} | {:<22}"
         .format(str(token.text), str(token.dep_), str(token.pos_), str(token.head.text), str([child for child in token.children])))
  
# Display the graph
displacy.render(sen, style='dep', jupyter=True, options={'distance': 110})

Token         | Relation  | Pos       |Head          | Children              
----------------------------------------------------------------------
The           | det       | DET       | atmosphere    | []                    
design        | compound  | NOUN      | atmosphere    | []                    
atmosphere    | nsubj     | NOUN      | good          | [The, design]         
good          | ROOT      | ADJ       | good          | [atmosphere, .]       
.             | punct     | PUNCT     | good          | []                    
