In [274]:
import pandas as pd
#import numpy as np
from datetime import datetime
from textblob import TextBlob
import spacy

spacy.require_cpu()
nlp = spacy.load("en_core_web_trf")

# load data
df = pd.read_csv('data/processed_review_data.csv',parse_dates=['date'])

# Restrict to review >=10 words
df = df[df['review_length']>=10]

In [276]:
# clean review text
from functions import lower_case,expandContractions,alpha_num,consec_dup,lemma
import re
def clean(text):
    text = re.sub(r'[?!:]', '.', text) # all sentence ends with '.'
    text = re.sub('\d*\.\d+','', text) # remove all flots
    text = re.sub("[^a-zA-Z0-9. ]", '', text) # remove all not listed chars
    text = re.sub('\.\.+', '. ',text) #remove repeat fullstops
    text = re.sub(' +',' ', text) # remove extra whitespace
    text = TextBlob(text)
    text = text.correct() # Correct spellings
    return text

for func in [expandContractions,clean,consec_dup,lemma]:
    df.review_text = df.review_text.map(func)

In [268]:
# split text into sentences and flatten
sentences = [x.split('.') for x in df.review_text]
sentences = [item for sublist in sentences for item in sublist]

In [270]:
# Extract aspects and descriptors
aspects = []
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  target = ''
  for token in doc:
    if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
      target = token.text
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text
  aspects.append({'aspect': target,
    'description': descriptive_term})

# remove entries with blank aspect or descriptor
aspects = [x for x in aspects if x['aspect']!='' and x['description']!='']

# Add sentiment polarity scores
for aspect in aspects:
  aspect['sentiment'] = TextBlob(aspect['description']).sentiment.polarity

sent_df = pd.DataFrame(aspects)
sent_df

[{'aspect': 'work', 'description': 'meaningless'},
 {'aspect': 'story', 'description': 'modern'},
 {'aspect': 'COD', 'description': 'fresh'},
 {'aspect': 'battle', 'description': 'different'},
 {'aspect': 'war', 'description': 'cold'},
 {'aspect': 'multiplayer', 'description': 'smart'},
 {'aspect': 'gunsmith', 'description': 'less'},
 {'aspect': 'style', 'description': 'actual'},
 {'aspect': 'CODs', 'description': 'previous'},
 {'aspect': 'onion', 'description': 'hard'},
 {'aspect': 'patch', 'description': 'annoying'},
 {'aspect': 'launch', 'description': 'bad'},
 {'aspect': 'way', 'description': 'high'},
 {'aspect': 'patch', 'description': 'last'},
 {'aspect': 'pass', 'description': 'well'},
 {'aspect': 'COD', 'description': 'next'},
 {'aspect': 'number', 'description': 'possible'},
 {'aspect': 'matchmaking', 'description': 'certain'},
 {'aspect': 'SBMM', 'description': 'least'},
 {'aspect': 'challenge', 'description': 'even easy'},
 {'aspect': 'cover', 'description': 'many'},
 {'aspe

In [272]:
sent_df[sent_df['sentiment']==0.0]

Unnamed: 0,aspect,description,sentiment
29,sense,entire,0.0
26,lobby,back,0.0
25,gun,red,0.0
15,COD,next,0.0
16,number,possible,0.0
14,pass,well,0.0
13,patch,last,0.0
7,style,actual,0.0
3,battle,different,0.0
