In [2]:
import spacy

In [3]:
nlp = spacy.load ("en_core_web_sm")

In [4]:
sentences = [
    "I like the food but service was not good",
    "I liked the food but service was pretty bad",
    "Food was awfully bad",
    "Nice Food, nice service",
]

In [5]:
spacy.displacy.render(nlp(sentences[0]), style='dep')

In [16]:
from spacy.lang.en import STOP_WORDS as stop_words

In [17]:
len (stop_words)

326

In [19]:
print (",".join(stop_words))

am,upon,put,other,via,namely,whereafter,what,toward,first,hereafter,thereby,against,her,give,seems,on,about,using,every,twenty,again,no,own,becoming,latter,next,some,their,around,further,always,that,moreover,’m,keep,’ve,towards,we,yet,nor,below,everywhere,due,or,thence,rather,it,out,regarding,into,hundred,much,whether,anyone,something,ever,its,eleven,two,down,whence,fifty,n’t,our,above,from,seemed,nine,there,as,neither,itself,should,still,last,whereupon,thereupon,since,'d,amount,sometimes,themselves,them,becomes,only,ourselves,ours,nothing,are,‘d,therefore,to,each,meanwhile,‘re,could,latterly,may,although,fifteen,hereby,someone,thru,hers,us,get,bottom,herein,seeming,while,well,none,thereafter,less,eight,third,until,over,’d,many,‘ve,really,whither,almost,yourself,twelve,being,’re,’ll,make,where,doing,his,empty,side,all,onto,whole,must,either,those,with,before,others,had,they,also,unless,whose,of,do,whenever,whereby,an,then,this,yours,hence,least,forty,here,whoever,mostly,himself,often,a

In [21]:
stop_doc = nlp (" ".join(stop_words))

In [22]:
len(stop_doc)

336

In [23]:
import pickle

In [25]:
neg_words = pickle.load (open ("neg_words.pkl", "rb"))

In [27]:
neg_words.append ('not')

In [28]:
assert 'not' in neg_words

In [29]:
sentences

['I like the food but service was not good',
 'I liked the food but service was pretty bad',
 'Food was awfully bad',
 'Nice Food, nice service']

In [30]:
doc = nlp (sentences[0])

In [33]:
for t in doc:
    print (t, t.pos, t.pos_, t.tag_, spacy.explain (t.tag_))

I 95 PRON PRP pronoun, personal
like 100 VERB VBP verb, non-3rd person singular present
the 90 DET DT determiner
food 92 NOUN NN noun, singular or mass
but 89 CCONJ CC conjunction, coordinating
service 92 NOUN NN noun, singular or mass
was 100 VERB VBD verb, past tense
not 86 ADV RB adverb
good 84 ADJ JJ adjective


TypeError: Argument 'vocab' has incorrect type (expected spacy.vocab.Vocab, got str)

In [39]:
def drop_adverbs (doc):
    advs = set()
    for t in doc:
        if t.pos == 86 and t.text not in neg_words:
            advs.add (t.i)
    return [t for t in doc if t.i not in advs]

In [44]:
doc_ex = nlp("Food was pretty amazing")
drop_adverbs (doc_ex)

[Food, was, amazing]

In [45]:
doc_ex2 = nlp("The service was not good")
drop_adverbs (doc_ex2)

[The, service, was, not, good]

In [46]:
doc_ex3 = nlp ("The food was pretty awful")
drop_adverbs (doc_ex3)

[The, food, was, awful]

In [47]:
doc_ex4 = nlp ("The food was too bad")
drop_adverbs (doc_ex4)

[The, food, was, bad]

In [48]:
neg_words = pickle.load (open ("neg_words.pkl", "rb"))

In [49]:
type (neg_words)

set

In [50]:
neg_words.add ('not')

In [51]:
len (neg_words)

4784

In [53]:
from spacy.lang.en import STOP_WORDS as stop_words

In [55]:
import string

In [59]:
type(string.punctuation)

str

In [57]:
stop_words.update (string.punctuation)

In [64]:
len(stop_words)

358

In [66]:
stop_words = set ([x for x in stop_words if x not in neg_words])

In [67]:
len (stop_words)

357

In [68]:
def drop_stopwords(doc):
    stpwrds = set()
    for t in doc:
        if t.text.lower() in stop_words:
            stpwrds.add (t.i)
    return [x for x in doc if x.i not in stpwrds]

In [71]:
drop_stopwords (doc_ex2)

[service, not, good]

In [72]:
doc_ex5 = nlp ("Food was very poor.")
for t in doc_ex5:
    print (t, t.pos, t.pos_, t.tag_)

Food 92 NOUN NN
was 100 VERB VBD
very 86 ADV RB
poor 84 ADJ JJ
. 97 PUNCT .


In [70]:
sentences[0]

'I like the food but service was not good'

In [175]:
def sentence_splitter (doc):
    splits = []
    
    def splitter (sentence):
        start = 0
        counter = 0
        for token in sentence:
            if token.pos == 89:
                if counter > start:
                    splits.append (sentence[start: counter])
                start = counter + 1
            counter += 1
        #print (sentence[start: ])
        if len (sentence[start: counter]) > 0:
            splits.append (sentence[start: counter])
            
    for sent in doc.sents:
        #print ("Sentence: ", sent)
        splitter (sent)
    return splits

In [176]:
sentence_splitter (nlp(sentences[0]))

[I like the food, service was not good]

In [177]:
doc_ex6 = nlp("Location was good. Ambience was perfect. Service was not that great but food was awesome")

In [178]:
x = sentence_splitter (doc_ex6)
x

[Location was good.,
 Ambience was perfect.,
 Service was not that great,
 food was awesome]

In [179]:
len (x)

4

In [132]:
sentence_splitter (nlp("Service was average and food was cool."))

Sentence:  Service was average and food was cool.


[, was average and food was cool.]

In [180]:
for t in nlp("Hello, I am balor"):
    print (t)

Hello
,
I
am
balor


In [182]:
x = ['A', 'B', 'C']
for entry in x:
    entry = entry.lower()

In [183]:
x

['A', 'B', 'C']