## Sentiment

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
def sentiment_scores(sentence, analyzer):
    """ create sentiment scores with the VADER analyzer
    :param sentence: sentence to create scores for
    :param analyzer: VADER sentiment analyzer
    :return score: a dictionary of scores (neg, neu, pos, compound)
    """
    score = analyzer.polarity_scores(sentence)
    return score

In [21]:
s = "I had a terrible day"
sentiment_scores(s, analyzer)

{'neg': 0.437, 'neu': 0.563, 'pos': 0.0, 'compound': -0.4767}

In [25]:
s = "My son had some problems at school and I feel overwhelmed."
sentiment_scores(s, analyzer)

{'neg': 0.209, 'neu': 0.698, 'pos': 0.093, 'compound': -0.3612}

In [27]:
s = "I am soooo angry at everyone!"
sentiment_scores(s, analyzer)

{'neg': 0.418, 'neu': 0.582, 'pos': 0.0, 'compound': -0.5562}

In [36]:
s = "Thank you for your responses"
sentiment_scores(s, analyzer)

{'neg': 0.0, 'neu': 0.615, 'pos': 0.385, 'compound': 0.3612}

In [35]:
s = "Thank you for your responses!"
sentiment_scores(s, analyzer)

{'neg': 0.0, 'neu': 0.589, 'pos': 0.411, 'compound': 0.4199}

In [34]:
s = "Thank you for your responses <3"
sentiment_scores(s, analyzer)

{'neg': 0.0, 'neu': 0.426, 'pos': 0.574, 'compound': 0.6597}

In [43]:
s = "Why is this so hard"
sentiment_scores(s, analyzer)

{'neg': 0.318, 'neu': 0.682, 'pos': 0.0, 'compound': -0.2183}

In [49]:
s = "Why is this so hard? I don't know how much more of this I can take"
sentiment_scores(s, analyzer)

{'neg': 0.111, 'neu': 0.889, 'pos': 0.0, 'compound': -0.2183}

In [50]:
s = "that sucks"
sentiment_scores(s, analyzer)

{'neg': 0.714, 'neu': 0.286, 'pos': 0.0, 'compound': -0.3612}

In [51]:
s = "that SUCKS"
sentiment_scores(s, analyzer)

{'neg': 0.764, 'neu': 0.236, 'pos': 0.0, 'compound': -0.4995}

In [66]:
s = "She loved it"
sentiment_scores(s, analyzer)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [67]:
s = "He loved it"
sentiment_scores(s, analyzer)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

## Emotion

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "cardiffnlp/twitter-roberta-base-emotion"

model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [2]:
from scipy.special import softmax

In [3]:
text = "I AM HAPPY"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
print(f"joy: {scores[1]}")
print(f"optimism: {scores[2]}")
print(f"sadness: {scores[3]}")
print(f"anger: {scores[0]}")

joy: 0.9289563298225403
optimism: 0.04013082757592201
sadness: 0.018498219549655914
anger: 0.012414592318236828


In [4]:
def get_emotion(text, print_res=True):
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    if print_res:
        print(f"joy: {scores[1]:.3f}")
        print(f"optimism: {scores[2]:.3f}")
        print(f"sadness: {scores[3]:.3f}")
        print(f"anger: {scores[0]:.3f}")
    return scores

In [32]:
get_emotion("That was great!")
get_emotion("That was great!!!")

joy: 0.926
optimism: 0.043
sadness: 0.011
anger: 0.020
joy: 0.942
optimism: 0.030
sadness: 0.010
anger: 0.019


In [5]:
def compare_text(text1, text2):
    scores1 = get_emotion(text1, False)
    scores2 = get_emotion(text2, False)
    print(f"joy:\n{scores1[1]:.3f}\n{scores2[1]:.3f}")
    print(f"optimism:\n{scores1[2]:.3f}\n{scores2[2]:.3f}")
    print(f"sadness:\n{scores1[3]:.3f}\n{scores2[3]:.3f}")
    print(f"anger:\n{scores1[0]:.3f}\n{scores2[0]:.3f}")

In [59]:
compare_text("that sucks", "that SUCKS")

joy:
0.015
0.007
optimism:
0.009
0.006
sadness:
0.627
0.015
anger:
0.348
0.972


In [61]:
compare_text("I kinda that.", "I really liked that.")

joy:
0.294
0.895
optimism:
0.133
0.058
sadness:
0.471
0.027
anger:
0.102
0.020


In [62]:
compare_text("That was very good", "That was not very good")

joy:
0.907
0.040
optimism:
0.054
0.029
sadness:
0.016
0.553
anger:
0.023
0.379


In [63]:
compare_text("It was hard. I’m doing well.", "It was hard. But I’m doing well.")

joy:
0.074
0.065
optimism:
0.847
0.856
sadness:
0.058
0.059
anger:
0.021
0.020


In [64]:
compare_text("He was angry.", "She was angry.")

joy:
0.007
0.007
optimism:
0.010
0.009
sadness:
0.007
0.007
anger:
0.976
0.977


In [65]:
compare_text("My son is annoying", "My daughter is annoying.")

joy:
0.008
0.007
optimism:
0.005
0.005
sadness:
0.009
0.010
anger:
0.978
0.977


In [38]:
import sqlite3
import pandas as pd
from pathlib import Path
from scraping import create_connection

In [35]:
p = Path.cwd()
path_parent = p.parents[1]
path_db = str(path_parent / "database" / "netmums-merged.db")

In [44]:
sql = '''
    SELECT
        text_clean
    FROM text
    LIMIT 50
'''

In [45]:
conn = create_connection(path_db)
df = pd.read_sql_query(sql, conn)
conn.close()

## Bias examples

In [12]:
def print_sentiment(sentence):
    scores = sentiment_scores(sentence, analyzer)
    print(f"compound: {scores['compound']:.3f}")
    print(f"positive: {scores['pos']:.3f}")
    print(f"negative: {scores['neg']:.3f}")
    print(f"neutral:  {scores['neu']:.3f}")

In [13]:
s = "She is a good doctor"
print_sentiment(s)
s = "He is a good doctor"
print_sentiment(s)

compound: 0.440
positive: 0.420
negative: 0.000
neutral:  0.580
compound: 0.440
positive: 0.420
negative: 0.000
neutral:  0.580


In [26]:
compare_text("She is a good doctor", "He is a good doctor")

joy:
0.545
0.491
optimism:
0.355
0.408
sadness:
0.055
0.055
anger:
0.045
0.046


In [32]:
s = "She is feminine"
print_sentiment(s)
s = "He is masculine"
print_sentiment(s)
s = "He is feminine"
print_sentiment(s)
s = "She is masculine"
print_sentiment(s)

compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000
compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000
compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000
compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000


In [6]:
compare_text("She is feminine", "He is masculine")

joy:
0.805
0.532
optimism:
0.089
0.179
sadness:
0.052
0.085
anger:
0.054
0.204


In [8]:
compare_text("feminine", "masculine")

joy:
0.593
0.524
optimism:
0.109
0.053
sadness:
0.153
0.243
anger:
0.144
0.180


In [7]:
compare_text("He is feminine", "She is masculine")

joy:
0.799
0.567
optimism:
0.068
0.139
sadness:
0.068
0.076
anger:
0.065
0.218


In [23]:
s = "My daughter is annoying."
print_sentiment(s)
s = "My son is annoying."
print_sentiment(s)

compound: -0.402
positive: 0.000
negative: 0.474
neutral:  0.526
compound: -0.402
positive: 0.000
negative: 0.474
neutral:  0.526


In [22]:
compare_text("My son is annoying.", "My daughter is annoying.")

joy:
0.008
0.007
optimism:
0.006
0.005
sadness:
0.010
0.010
anger:
0.976
0.977


In [36]:
s = "The black man was angry"
print_sentiment(s)
s = "The white man was angry"
print_sentiment(s)

compound: -0.511
positive: 0.000
negative: 0.452
neutral:  0.548
compound: -0.511
positive: 0.000
negative: 0.452
neutral:  0.548


In [35]:
compare_text("The black man was angry", "The white man was angry")

joy:
0.005
0.006
optimism:
0.008
0.008
sadness:
0.006
0.006
anger:
0.980
0.980


In [37]:
s = "The black man was a doctor"
print_sentiment(s)
s = "The white man was a doctor"
print_sentiment(s)

compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000
compound: 0.000
positive: 0.000
negative: 0.000
neutral:  1.000


In [38]:
compare_text("The black man was a doctor", "The white man was a doctor")

joy:
0.528
0.515
optimism:
0.208
0.184
sadness:
0.177
0.172
anger:
0.087
0.129
