In [16]:
# %load spacy-workshop/utils/single_sentence.py
import json 


from dataclasses import dataclass
from typing import Callable, Set, List, Iterable

import spacy
from spacy.tokens.token import Token
from spacy.tokens.doc import Doc
from spacy.tokens.span import Span


@dataclass
class SingleSentence(object):
    root: Token
    subtree: Set[spacy.tokens.token.Token]
    
    @property
    def start_idx(self):
        """
        Returns position (index) in original document where the single sentence begins.
        """
        return min(self.subtree, key=lambda token: token.i).i
    
    @property
    def end_index(self):
        """
        Returns last position (index) of the single sentence in original document.
        """
        return max(self.subtree, key=lambda token: token.i).i
    
    @property
    def tokens(self):
        """
        Return list of tokens.
        """
        return sorted(list(self.subtree), key=lambda token: token.idx)


def default_root_finding_strategy(token: Token) -> bool:
    return (token.pos_ in {'VERB', 'ADJ'} and token.dep_ in {'ccomp', 'conj'}) or token.dep_=='ROOT'


class SingleSentenceSplitter(object):
    """
    Splits complex sentences into single-verb sentences using provided root-finding strategy.
    Strategy is a function that, given a Token, shou
    ld return True if this token is a root of a sentence.
    """
    def __init__(self, root_finding_strategy: Callable[[Token], bool]=default_root_finding_strategy):
        self.root_finding_strategy = root_finding_strategy

    def _get_single_sentences(self, sent: Span) -> List[SingleSentence]:
        """
        Find roots of possible single sentences given the root finding strategy.
        """
        return [
            SingleSentence(token, set(token.subtree))
            for token in sent 
            if self.root_finding_strategy(token)
        ]

    def _make_unique(self, single_sents: List[SingleSentence]) -> List[SingleSentence]:
        """
        Remove subsentences from their parent sentences so that each word is mapped 
        to one and only one single sentence.
        """
        for i, s1 in enumerate(single_sents):
            for j, s2 in enumerate(single_sents[:i]):
                assert(j<i)
                #if s1.subtree.issubset(s2.subtree):
                if s1.root in s2.subtree:
                    single_sents[j].subtree = single_sents[j].subtree - s1.subtree
                #if s2.subtree.issubset(s1.subtree):
                if s2.root in s1.subtree:
                    single_sents[i].subtree = single_sents[i].subtree - s2.subtree
        return single_sents

    def __call__(self, doc: Doc) -> Iterable[Span]:
        """
        Perform sentence splitting on a given document.
        """
        for sent in doc.sents:
            separated_sents = self._make_unique(self._get_single_sentences(doc))
            for single_sent in separated_sents:
                #yield doc[single_sent.start_idx:single_sent.end_index]
                yield single_sent.tokens

In [17]:
data = json.load(open('data.json'))

In [18]:
len(data)

2000

In [19]:
import pandas as pd

df=pd.DataFrame(data)

In [20]:
df.head()

Unnamed: 0,opinions,text
0,"[{'target': 'place', 'category': 'RESTAURANT#G...",Judging from previous posts this used to be a ...
1,"[{'target': 'staff', 'category': 'SERVICE#GENE...","We, there were four of us, arrived at noon - t..."
2,"[{'target': 'NULL', 'category': 'SERVICE#GENER...","They never brought us complimentary noodles, i..."
3,"[{'target': 'food', 'category': 'FOOD#QUALITY'...",The food was lousy - too sweet or too salty an...
4,"[{'target': 'NULL', 'category': 'SERVICE#GENER...","After all that, they complained to me about th..."


In [21]:
import spacy

In [22]:
nlp=spacy.load('en')

In [23]:
#smaller sample
dfs=df.iloc[:100]

# Single sentence analysis

In [24]:
text=df.text[3]
opinions=df.opinions[3]

In [25]:
doc=nlp(text)

doc

The food was lousy - too sweet or too salty and the portions tiny.

In [26]:
opinions

[{'category': 'FOOD#QUALITY',
  'from': 4,
  'polarity': 'negative',
  'target': 'food',
  'to': 8},
 {'category': 'FOOD#STYLE_OPTIONS',
  'from': 52,
  'polarity': 'negative',
  'target': 'portions',
  'to': 60}]

### Entities

In [27]:
[nc for nc in doc.noun_chunks]

[The food, the portions]

In [28]:
[nc.root for nc in doc.noun_chunks]

[food, portions]

In [29]:
[token for token in doc if token.pos_=='NOUN']

[food, portions]

In [30]:
doc.ents

()

### Sentiment terms

In [31]:
[token for token in doc
        if not token.is_stop
        if not token.is_punct
        if token.pos_ == 'ADJ' or token.pos_ == 'VERB']

[lousy, sweet, salty, tiny]

## Splitting

In [32]:
def dep_vis(doc):
    spacy.displacy.render(doc, style='dep', jupyter=True, options={'distance': 100})

## Back to task

In [33]:
doc=nlp(text)
sss=SingleSentenceSplitter()

In [34]:
dep_vis(doc)

In [35]:
ss=list(sss(doc))

In [36]:
list(doc.noun_chunks)

[The food, the portions]

In [37]:
t=ss[2][2]

In [38]:
def strategy(token: Token) -> bool:
    return (token.pos_ in {'VERB', 'ADJ'}
            and token.dep_ in {'ccomp', 'conj'}
            and 'NOUN' in [t.pos_ for t in token.subtree]) or token.dep_=='ROOT'

In [39]:
sss=SingleSentenceSplitter(strategy)

In [40]:
subsents=list(sss(doc))

In [41]:
subsents

[[The, food, was, lousy, -, too, sweet, or, too, salty, and],
 [the, portions, tiny, .]]

In [42]:
'food' in [token.lemma_ for token in subsents[0]]

True

In [43]:
def stringize(subsent):
    return ' '.join([token.orth_ for token in subsent])

In [44]:
'food' in stringize(subsents[0])

True

In [45]:
def get_subsents(text):
    doc=nlp(text)
    return list(sss(doc))

In [46]:
dfs['subsents']=dfs.text.apply(get_subsents)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [47]:
dfs['sub_number']=dfs['subsents'].str.len()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [48]:
(dfs['opinions'].str.len()==dfs['sub_number']).head()

0     True
1    False
2    False
3     True
4     True
dtype: bool

In [49]:
dfs.iloc[1].opinions

[{'category': 'SERVICE#GENERAL',
  'from': 75,
  'polarity': 'negative',
  'target': 'staff',
  'to': 80}]

In [50]:
dfs.iloc[1].subsents

[[We, ,, there, were, four, of, us, ,],
 [arrived, at, noon, -],
 [the, place, was, empty, -, and],
 [the,
  staff,
  acted,
  like,
  we,
  were,
  imposing,
  on,
  them,
  and,
  they,
  were,
  very,
  rude,
  .]]

In [51]:
(dfs['opinions'].str.len()>dfs['sub_number']).sum()

13

In [52]:
dfs[dfs['opinions'].str.len()>dfs['sub_number']]

Unnamed: 0,opinions,text,subsents,sub_number
10,"[{'target': 'NULL', 'category': 'RESTAURANT#PR...","For the price, you cannot eat this well in Man...","[[For, the, price, ,, you, can, not, eat, this...",1
18,"[{'target': 'Service', 'category': 'SERVICE#GE...","Service was devine, oysters where a sensual as...","[[Service, was, devine, ,, oysters, where, a, ...",1
57,"[{'target': 'sushi', 'category': 'FOOD#QUALITY...",The sushi seemed pretty fresh and was adequate...,"[[The, sushi, seemed, pretty, fresh, and, was,...",1
59,"[{'target': 'half price sushi deal', 'category...",We took advanatage of the half price sushi dea...,"[[We, took, advanatage, of, the, half, price, ...",1
69,"[{'target': 'Prix Fixe menu', 'category': 'FOO...",The Prix Fixe menu is worth every penny and yo...,"[[The, Prix, Fixe, menu, is, worth, every, pen...",2
72,"[{'target': 'NULL', 'category': 'FOOD#QUALITY'...","It is terrific, as is the value.","[[It, is, terrific, ,, as, is, the, value, .]]",1
73,"[{'target': 'food', 'category': 'FOOD#STYLE_OP...","$6 and there is much tasty food, all of it fre...","[[$, 6, and, there, is, much, tasty, food, ,, ...",1
77,"[{'target': 'food', 'category': 'FOOD#QUALITY'...","The food here is rather good, but only if you ...","[[The, food, here, is, rather, good, ,, but, o...",1
78,"[{'target': 'somosas', 'category': 'FOOD#QUALI...","I like the somosas, chai, and the chole, but t...","[[I, like, the, somosas, ,, chai, ,, and, the,...",2
85,"[{'target': 'service', 'category': 'SERVICE#GE...",Add to that great service and great food at a ...,"[[Add, to, that, great, service, and, great, f...",2


In [53]:
dfs.iloc[89].opinions

[{'category': 'FOOD#QUALITY',
  'from': 4,
  'polarity': 'positive',
  'target': 'food',
  'to': 8},
 {'category': 'FOOD#QUALITY',
  'from': 43,
  'polarity': 'negative',
  'target': 'French Onion soup',
  'to': 60},
 {'category': 'FOOD#STYLE_OPTIONS',
  'from': 43,
  'polarity': 'positive',
  'target': 'French Onion soup',
  'to': 60},
 {'category': 'FOOD#QUALITY',
  'from': 104,
  'polarity': 'negative',
  'target': 'desserts',
  'to': 112}]

In [54]:
dfs.iloc[89].text

'The food was average to above-average; the French Onion soup filling yet not overly impressive, and the desserts not brilliant in any way.'

In [55]:
dfs.iloc[89].subsents

[[The,
  food,
  was,
  average,
  to,
  above,
  -,
  average,
  ;,
  the,
  French,
  Onion,
  soup,
  filling,
  yet,
  not,
  overly,
  impressive,
  ,,
  and],
 [the, desserts, not, brilliant, in, any, way, .]]

In [56]:
doc=nlp(dfs.iloc[89].text)

In [57]:
dep_vis(doc)

In [58]:
# add 'advcl'
def strategy2(token: Token) -> bool:
    return (token.pos_ in {'VERB', 'ADJ'}
            and token.dep_ in {'ccomp', 'conj', 'advcl'}
            and 'NOUN' in [t.pos_ for t in token.subtree]) or token.dep_=='ROOT'

In [59]:
sss=SingleSentenceSplitter(strategy2)
def get_subsents(text):
    doc=nlp(text)
    return list(sss(doc))

In [60]:
dfs['subsents']=dfs.text.apply(get_subsents)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [61]:
dfs['sub_number']=dfs['subsents'].str.len()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [62]:
(dfs['opinions'].str.len()>dfs['sub_number']).sum()

11

In [63]:
dfs[dfs['opinions'].str.len()>dfs['sub_number']]

Unnamed: 0,opinions,text,subsents,sub_number
10,"[{'target': 'NULL', 'category': 'RESTAURANT#PR...","For the price, you cannot eat this well in Man...","[[For, the, price, ,, you, can, not, eat, this...",1
18,"[{'target': 'Service', 'category': 'SERVICE#GE...","Service was devine, oysters where a sensual as...","[[Service, was, devine, ,, oysters, where, a, ...",1
57,"[{'target': 'sushi', 'category': 'FOOD#QUALITY...",The sushi seemed pretty fresh and was adequate...,"[[The, sushi, seemed, pretty, fresh, and, was,...",1
59,"[{'target': 'half price sushi deal', 'category...",We took advanatage of the half price sushi dea...,"[[We, took, advanatage, of, the, half, price, ...",1
69,"[{'target': 'Prix Fixe menu', 'category': 'FOO...",The Prix Fixe menu is worth every penny and yo...,"[[The, Prix, Fixe, menu, is, worth, every, pen...",2
73,"[{'target': 'food', 'category': 'FOOD#STYLE_OP...","$6 and there is much tasty food, all of it fre...","[[$, 6, and, there, is, much, tasty, food, ,, ...",1
77,"[{'target': 'food', 'category': 'FOOD#QUALITY'...","The food here is rather good, but only if you ...","[[The, food, here, is, rather, good, ,, but, o...",1
78,"[{'target': 'somosas', 'category': 'FOOD#QUALI...","I like the somosas, chai, and the chole, but t...","[[I, like, the, somosas, ,, chai, ,, and, the,...",2
85,"[{'target': 'service', 'category': 'SERVICE#GE...",Add to that great service and great food at a ...,"[[Add, to, that, great, service, and, great, f...",2
89,"[{'target': 'food', 'category': 'FOOD#QUALITY'...",The food was average to above-average; the Fre...,"[[The, food, was, average, to, above, -, avera...",3


In [64]:
dfs.iloc[69].opinions

[{'category': 'FOOD#QUALITY',
  'from': 4,
  'polarity': 'positive',
  'target': 'Prix Fixe menu',
  'to': 18},
 {'category': 'FOOD#STYLE_OPTIONS',
  'from': 4,
  'polarity': 'positive',
  'target': 'Prix Fixe menu',
  'to': 18},
 {'category': 'FOOD#PRICES',
  'from': 4,
  'polarity': 'positive',
  'target': 'Prix Fixe menu',
  'to': 18}]

In [65]:
dfs.iloc[69].subsents

[[The, Prix, Fixe, menu, is, worth, every, penny, and],
 [you, get, more, than, enough, (, both, in, quantity, AND, quality, ), .]]

In [66]:
dep_vis(nlp(dfs.iloc[69].text))

In [67]:
dfs.iloc[89].opinions, dfs.iloc[89].subsents

([{'category': 'FOOD#QUALITY',
   'from': 4,
   'polarity': 'positive',
   'target': 'food',
   'to': 8},
  {'category': 'FOOD#QUALITY',
   'from': 43,
   'polarity': 'negative',
   'target': 'French Onion soup',
   'to': 60},
  {'category': 'FOOD#STYLE_OPTIONS',
   'from': 43,
   'polarity': 'positive',
   'target': 'French Onion soup',
   'to': 60},
  {'category': 'FOOD#QUALITY',
   'from': 104,
   'polarity': 'negative',
   'target': 'desserts',
   'to': 112}],
 [[The, food, was, average, to, above, -, average, ;],
  [the, French, Onion, soup, filling, yet, not, overly, impressive, ,, and],
  [the, desserts, not, brilliant, in, any, way, .]])

In [68]:
doc = nlp(dfs.iloc[89].text)

In [69]:
list(doc.noun_chunks)

[The food, above-average, the French Onion soup, the desserts, any way]

In [70]:
def get_matches(full_sent, subsents, opinions):
    matches=[]

    subsent_strings=[stringize(subsent) for subsent in subsents]
    for opinion in opinions:
        if opinion['from']==opinion['to']==0:
            matches.append((full_sent, opinion))
        else:
            for i, subsent in enumerate(subsent_strings):
                if opinion['target'] in subsent:
                    matches.append((subsent, opinion))
                
    return matches

In [71]:
dfs.subsents[3], dfs.opinions[3]

([[The, food, was, lousy, -, too, sweet, or, too, salty, and],
  [the, portions, tiny, .]],
 [{'category': 'FOOD#QUALITY',
   'from': 4,
   'polarity': 'negative',
   'target': 'food',
   'to': 8},
  {'category': 'FOOD#STYLE_OPTIONS',
   'from': 52,
   'polarity': 'negative',
   'target': 'portions',
   'to': 60}])

In [72]:
get_matches(dfs.text[3], dfs.subsents[3], dfs.opinions[3])

[('The food was lousy - too sweet or too salty and',
  {'category': 'FOOD#QUALITY',
   'from': 4,
   'polarity': 'negative',
   'target': 'food',
   'to': 8}),
 ('the portions tiny .',
  {'category': 'FOOD#STYLE_OPTIONS',
   'from': 52,
   'polarity': 'negative',
   'target': 'portions',
   'to': 60})]

In [73]:
dfs.subsents[3][0][0]

The

In [74]:
aligned=dfs.apply(lambda row: get_matches(row.text, row.subsents, row.opinions), axis=1)

In [75]:
dfs.iloc[24]

opinions      [{'category': None, 'from': 0, 'polarity': 'ne...
text          I had my eyes on this place, promising myself ...
subsents      [[I, had, my, eyes, on, this, place, ,, ., '],...
sub_number                                                    3
Name: 24, dtype: object

In [76]:
pairs=[pair for pairs in list(aligned)
for pair in pairs]

In [77]:
len(pairs)

148

In [78]:
len([opinion for opinions in list(dfs['opinions'])
for opinion in opinions])

141

In [79]:
len(pairs)

148

In [80]:
pairs=[(pair[0], pair[1]['category']) for pair in pairs]

In [81]:
data=pd.DataFrame(pairs, columns=['text', 'label'])

In [82]:
data.fillna('NONE', inplace=True)

In [83]:
labels=list(data['label'].unique())
labels

['RESTAURANT#GENERAL',
 'SERVICE#GENERAL',
 'FOOD#QUALITY',
 'FOOD#STYLE_OPTIONS',
 'DRINKS#STYLE_OPTIONS',
 'DRINKS#PRICES',
 'RESTAURANT#PRICES',
 'RESTAURANT#MISCELLANEOUS',
 'AMBIENCE#GENERAL',
 'NONE',
 'FOOD#PRICES',
 'LOCATION#GENERAL']

In [84]:
textcat = nlp.create_pipe(
            "textcat",
            config={
                "exclusive_classes": True,
                "architecture": "bow",
            }
        )

In [85]:
nlp.remove_pipe('textcat')

ValueError: [E001] No component 'textcat' found in pipeline. Available names: ['tagger', 'parser', 'ner']

In [86]:
nlp.add_pipe(textcat, last=True)

In [87]:
for label in labels:
    textcat.add_label(label)

In [88]:
textcat.labels

['RESTAURANT#GENERAL',
 'SERVICE#GENERAL',
 'FOOD#QUALITY',
 'FOOD#STYLE_OPTIONS',
 'DRINKS#STYLE_OPTIONS',
 'DRINKS#PRICES',
 'RESTAURANT#PRICES',
 'RESTAURANT#MISCELLANEOUS',
 'AMBIENCE#GENERAL',
 'NONE',
 'FOOD#PRICES',
 'LOCATION#GENERAL']

In [89]:
data.head()

Unnamed: 0,text,label
0,"this used to be a good place , but not any lon...",RESTAURANT#GENERAL
1,the staff acted like we were imposing on them ...,SERVICE#GENERAL
2,"They never brought us complimentary noodles, i...",SERVICE#GENERAL
3,The food was lousy - too sweet or too salty and,FOOD#QUALITY
4,the portions tiny .,FOOD#STYLE_OPTIONS


In [90]:
def convert_label(label):
    return {'cats': {l: l==label for l in labels}}

In [91]:
convert_label('RESTAURANT#GENERAL')

{'cats': {'AMBIENCE#GENERAL': False,
  'DRINKS#PRICES': False,
  'DRINKS#STYLE_OPTIONS': False,
  'FOOD#PRICES': False,
  'FOOD#QUALITY': False,
  'FOOD#STYLE_OPTIONS': False,
  'LOCATION#GENERAL': False,
  'NONE': False,
  'RESTAURANT#GENERAL': True,
  'RESTAURANT#MISCELLANEOUS': False,
  'RESTAURANT#PRICES': False,
  'SERVICE#GENERAL': False}}

In [92]:
converted_data = list(data.apply(lambda row: (row.text, convert_label(row.label)), axis=1))

## Model training

In [93]:
import random

def split(converted_data, split=0.8, random_seed=10):
    random.seed(random_seed)
    random.shuffle(converted_data)
    n = int(len(converted_data) * split)
    return (converted_data[:n], converted_data[n:])

In [94]:
train, validate = split(converted_data)

In [95]:
len(train), len(validate)

(118, 30)

In [97]:
def evaluate(tokenizer, textcat, valid_data):
    texts, labels = zip(*valid_data)
    docs = (tokenizer(text) for text in texts)
    tp = 0.0  # True positives
    fp = 1e-8  # False positives
    fn = 1e-8  # False negatives
    tn = 0.0  # True negatives
    
    for i, doc in enumerate(textcat.pipe(docs)):
        y_true = labels[i]
        for label, score in doc.cats.items():
            if label not in y_true:
                continue
            if label == "NEGATIVE":
                continue
            if score >= 0.5 and y_true[label] >= 0.5:
                tp += 1.0
            elif score >= 0.5 and y_true[label] < 0.5:
                fp += 1.0
            elif score < 0.5 and y_true[label] < 0.5:
                tn += 1
            elif score < 0.5 and y_true[label] >= 0.5:
                fn += 1
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    if (precision + recall) == 0:
        f_score = 0.0
    else:
        f_score = 2 * (precision * recall) / (precision + recall)
    return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}


In [98]:
from spacy.util import minibatch, compounding

def train_model(textcat, train_data, val_data, n_iter):
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
    with nlp.disable_pipes(*other_pipes):  # only train textcat
        optimizer = nlp.begin_training()
        
        print("Training the model...")
        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
        batch_sizes = compounding(4.0, 32.0, 1.001)
        for i in range(n_iter):
            losses = {}
            # batch up the examples using spaCy's minibatch
            random.shuffle(train_data)
            batches = minibatch(train_data, size=batch_sizes)
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
            with textcat.model.use_params(optimizer.averages):
                # evaluate on the dev data split off in load_data()
                scores = evaluate(nlp.tokenizer, textcat, val_data)
            print(
                "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format(  
                    losses["textcat"],
                    scores["textcat_p"],
                    scores["textcat_r"],
                    scores["textcat_f"],
                )
            )

In [152]:
train_model(textcat, train, validate, 10)

Training the model...
LOSS 	  P  	  R  	  F  
17.872	0.000	0.000	0.000
17.594	0.000	0.000	0.000
17.826	0.000	0.000	0.000
16.250	0.000	0.000	0.000
14.718	0.000	0.000	0.000
14.840	0.000	0.000	0.000
14.127	0.000	0.000	0.000
10.852	0.000	0.000	0.000
10.162	0.000	0.000	0.000
9.594	0.000	0.000	0.000


In [99]:
from textblob.sentiments import PatternAnalyzer

In [100]:
analyzer=PatternAnalyzer()

In [3]:
analyzer.analyze(doc.text).polarity

NameError: name 'doc' is not defined

In [108]:
def get_sentiment(text):
    return analyzer.analyze(doc.text).polarity

In [109]:
data.text.apply(get_sentiment)

0      0.1
1      0.1
2      0.1
3      0.1
4      0.1
5      0.1
6      0.1
7      0.1
8      0.1
9      0.1
10     0.1
11     0.1
12     0.1
13     0.1
14     0.1
15     0.1
16     0.1
17     0.1
18     0.1
19     0.1
20     0.1
21     0.1
22     0.1
23     0.1
24     0.1
25     0.1
26     0.1
27     0.1
28     0.1
29     0.1
      ... 
118    0.1
119    0.1
120    0.1
121    0.1
122    0.1
123    0.1
124    0.1
125    0.1
126    0.1
127    0.1
128    0.1
129    0.1
130    0.1
131    0.1
132    0.1
133    0.1
134    0.1
135    0.1
136    0.1
137    0.1
138    0.1
139    0.1
140    0.1
141    0.1
142    0.1
143    0.1
144    0.1
145    0.1
146    0.1
147    0.1
Name: text, Length: 148, dtype: float64

In [None]:
#We need to use other lexicon method - see in the_guardian for method in textacy, or use nltk.vader

In [None]:
# or - better option get all ADJs and VERBs and build our own lexicon