In [2]:
# FUNCTION DECLARATIONS

# getAspectDescription(text: string) => [{aspect: string, description: string}]
# getSentiment(raw_text: string) => (output: string, prediction: (polarity, subjectivity))

# getAspectDescription

In [3]:
# We get started by importing spacy
import spacy
nlp = spacy.load("en_core_web_lg")

In [4]:
# getAspectDescription(text: string) => [{aspect: string, description: string}]

def getAspectDescription(text):
    # getAspectDescription(text: string) => [{aspect: string, description: string}]
    aspects = []

    doc = nlp(text)
    descriptive_term = ''
    target = []
    for token in doc:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            # target = token.text
            target.append(token.text)
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            descriptive_term = f"{prepend}" + token.text

    aspects.append({'aspect': target, 'description': descriptive_term})

    return aspects


In [5]:
getAspectDescription('The product is strong, but it was not the right color. Poor packaging')

[{'aspect': ['product'], 'description': 'Poor'}]

# getSentiment

In [7]:
# getSentiment(raw_text: string) => (output: string, prediction: (polarity, subjectivity))
import pandas as pd
import re
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import pickle

model = pickle.load(open('modelNB.pkl', 'rb'))

def getSentiment(raw_text: str):

    # Instantiate PorterStemmer
    p_stemmer = PorterStemmer()

    # Remove HTML
    review_text = BeautifulSoup(raw_text).get_text()

    # Remove non-letters
    letters_only = re.sub("[^a-zA-Z]", " ", review_text)

    # Convert words to lower case and split each word up
    words = letters_only.lower().split()

    # Convert stopwords to a set
    stops = set(stopwords.words('english'))

    # Adding on stopwords that were appearing frequently in both positive and negative reviews
    stops.update(['app','shopee','shoppee','item','items','seller','sellers','bad'])

    # Remove stopwords
    meaningful_words = [w for w in words if w not in stops]

    # Stem words
    meaningful_words = [p_stemmer.stem(w) for w in meaningful_words]

    # Join words back into one string, with a space in between each word
    final_text = pd.Series(" ".join(meaningful_words))

    # Generate predictions
    pred = model.predict(final_text)[0]

    if pred == 1:
        output = "Negative"
    else:
        output = "Postive"

    return output, pred


# text = """
# anc work in the first 3 days but the proximity sensor on the left bud stopped working
# """
# result = getSentiment(text)
# print(f'RAW_TEXT: \t{text}')
# print(F'\tSENTIMENT: {result}')

In [12]:
text = 'The product is strong, but it was not the right color. Poor packaging'

In [13]:
print(f"Text: {text}", getSentiment(text), sep='\t')

Text: The product is strong, but it was not the right color. Poor packaging	('Negative', 1)


# Aspect Sentiment Classification

In [17]:
text = 'The food we had yesterday was delicious'
aspects = getAspectDescription(text)

aspectSentiment = [{'aspect': aspect['aspect'], 'description': aspect['description'], 'sentiment': getSentiment(aspect['description'])} for aspect in aspects]


print(aspectSentiment)

[{'aspect': ['effect', 'cats'], 'description': 'very lovey', 'sentiment': ('Postive', 0)}]
