In [None]:
import textstat
from nltk.sentiment import SentimentIntensityAnalyzer
import en_core_web_sm
import pandas as pd


nlp = en_core_web_sm.load()

def get_readability(text):
    return textstat.flesch_reading_ease(text)

analyzer = SentimentIntensityAnalyzer()
def sentiment_analysis(text):
    sentiment = analyzer.polarity_scores(text)['compound']
    return sentiment


filepath = "./NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt"
emolex_df = pd.read_csv(filepath,  names=["word", "emotion", "association"], skiprows=45, sep='\t', keep_default_na=False)
# function that returns a list of emotions for a given text
def emotion_analyzer(text, emotion_df=emolex_df):
    emo_score = {emo: 0 for emo in emolex_df.emotion.unique()}
    for word in text.split():
        emo_word = emotion_df[emotion_df.word == word]
        emotions = list(emo_word.emotion)
        associations = list(emo_word.association)
        if not emo_word.empty:
            for emotion, association in zip(emotions, associations):
                emo_score[emotion] += int(association)
    print(emo_score)
    return emo_score


def get_features(text):
    # char_length
    length = len(text)
    # number of words
    words = len(text.split())
    # number of sentences
    sentence_length = len(text.split('.'))    
    # readability
    readability = get_readability(text)
    # sentiment
    sentiment = sentiment_analysis(text)

    # ner
    doc = nlp(text)
    ner = doc.ents

    # emotions
    emotions = emotion_analyzer(text)

    # return dictionary
    return {
        "num_of_chars": length,
        "num_of_words": words,
        "num_of_sentences": sentence_length,
        "readability": readability,
        "sentiment": sentiment,
        "ner": ner,
        "emotions": emotions
    }




