In [340]:
import re
import csv

from afinn import Afinn
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import sentiwordnet as swn
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import spacy
nlp = spacy.load('en_core_web_sm', disable=['ner'])

In [341]:
def import_data(data, sep, col_name):
    hansard = pd.read_csv(data, sep=sep, usecols=[col_name])
    debate_text = hansard[col_name].tolist()
    return debate_text

In [342]:
def grammatical_collocates(ls, keyword, **kwargs):
    return_type = kwargs.get('return_type', None)
    regex = re.compile(keyword)
    
    pool = Pool(n_cores)
    
    collocates = []
    
    for string in ls:
        doc = nlp(string)
        
        for token in doc:
            if regex.match(token.text):
                col = str(token.text) + ' ' + str(token.head.text)
                collocates.append(str(token.text) + ' ' + str(token.head.text))
                
                for child in token.children:
                    collocates.append(str(token.text) + ' ' + str(child))
                    
    if return_type == 'ls':
        return collocates
    if return_type == 'df':
        return pd.DataFrame(collocates, columns =['grammatical_collocates'])

In [343]:
def afinn_sentiment(text):
    return Afinn().score(text)

def textblob_sentiment(text):
    return TextBlob(text).sentiment.polarity

def vader_sentiment(text):
    return SentimentIntensityAnalyzer().polarity_scores(text)

In [344]:
def sentiment_score(df, col_name):
    df['afinn'] = df[col_name].apply(afinn_sentiment)
    df['textblob'] = df[col_name].apply(textblob_sentiment)
    df['vader'] = df[col_name].apply(vader_sentiment)
    df['vader'] = df['vader'].apply(lambda score_dict: score_dict['compound'])    
    return df

In [345]:
out = import_data('/users/sbuongiorno/hansard_justnine_w_year.csv', ',', 'text')

In [328]:
save = grammatical_collocates(out, 'he', return_type='df')

In [333]:
test = sentiment_score(save, 'grammatical_collocates')

In [334]:
test

Unnamed: 0,grammatical_collocates,afinn_score,textblob_score,vader
0,he was,0.0,0.0,0.0
1,he avoid,-1.0,0.0,-0.296
2,he trusted,2.0,0.0,0.4767
3,he persevere,0.0,0.0,0.0
4,he avail,0.0,0.0,0.0
5,he mentioned,0.0,0.0,0.0
6,he take,0.0,0.0,0.0
7,he begged,0.0,0.0,0.0
8,he knew,0.0,0.0,0.0
9,he wished,0.0,0.0,0.0
