In [16]:
import spacy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [20]:
import string

In [21]:
nlp = spacy.load ("en_core_web_lg")
sia = SentimentIntensityAnalyzer()

In [74]:
stop_words = set()
stop_words.add ('the')
stop_words.update (string.punctuation)

In [94]:
# Remove all adverbs
def clean_tokens (doc):
    for token in doc:
        print (token, token.pos_, token.tag_, spacy.explain (token.tag_))
        if token.pos == 86 or token.text.lower() in stop_words:
            pass
        else:
            yield token

In [95]:
doc = nlp("The food was pretty bad but service was okay")

In [96]:
list(clean_tokens(doc))

The DET DT determiner
food NOUN NN noun, singular or mass
was VERB VBD verb, past tense
pretty ADV RB adverb
bad ADJ JJ adjective
but CCONJ CC conjunction, coordinating
service NOUN NN noun, singular or mass
was VERB VBD verb, past tense
okay ADJ JJ adjective


[food, was, bad, but, service, was, okay]

In [97]:
def split_on_conjunctions (tokens):
    splits = []
    part = []
    for token in tokens:
        #print (token)
        if token.pos == 89:
            splits.append (" ".join (part))
            part.clear()
        else:
            part.append (token.text)
    if len(part) > 0:
        splits.append (" ".join(part))
    return splits

In [98]:
splits = split_on_conjunctions (clean_tokens(doc))

The DET DT determiner
food NOUN NN noun, singular or mass
was VERB VBD verb, past tense
pretty ADV RB adverb
bad ADJ JJ adjective
but CCONJ CC conjunction, coordinating
service NOUN NN noun, singular or mass
was VERB VBD verb, past tense
okay ADJ JJ adjective


In [89]:
splits

['food was bad', 'service was okay']

In [90]:
for entry in splits:
    print (entry, f"[{', '.join(map(str, nlp(entry).noun_chunks))}]")
    print (sia.polarity_scores (entry))
    print()

food was bad [food]
{'neg': 0.636, 'neu': 0.364, 'pos': 0.0, 'compound': -0.5423}

service was okay [service]
{'neg': 0.0, 'neu': 0.513, 'pos': 0.487, 'compound': 0.2263}



In [91]:
review = "I liked the story of the movie, but the actors did not justified their roles"

In [99]:
sia.polarity_scores (review)

{'neg': 0.172, 'neu': 0.715, 'pos': 0.113, 'compound': -0.2469}

In [101]:
list (clean_tokens (nlp(review)))

I PRON PRP pronoun, personal
liked VERB VBD verb, past tense
the DET DT determiner
story NOUN NN noun, singular or mass
of ADP IN conjunction, subordinating or preposition
the DET DT determiner
movie NOUN NN noun, singular or mass
, PUNCT , punctuation mark, comma
but CCONJ CC conjunction, coordinating
the DET DT determiner
actors NOUN NNS noun, plural
did VERB VBD verb, past tense
not ADV RB adverb
justified VERB VBN verb, past participle
their DET PRP$ pronoun, possessive
roles NOUN NNS noun, plural


[I, liked, story, of, movie, but, actors, did, justified, their, roles]

In [93]:
splits = split_on_conjunctions (clean_tokens(nlp(review)))

I PRON PRP pronoun, personal
liked VERB VBD verb, past tense
story NOUN NN noun, singular or mass
of ADP IN conjunction, subordinating or preposition
movie NOUN NN noun, singular or mass
but CCONJ CC conjunction, coordinating
actors NOUN NNS noun, plural
did VERB VBD verb, past tense
justified VERB VBN verb, past participle
their DET PRP$ pronoun, possessive
roles NOUN NNS noun, plural


In [86]:
splits

['I liked story of movie', 'actors did justified their roles']