In [1]:
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer, util
import re

model = SentenceTransformer('all-mpnet-base-v2')

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0


## Expressive Language Test

Theory: The more descriptive a text is, the more meaning it infers about the topics it covers. When targeting areas of meaning, the text used to generate these embeddings should simultaneously be as descriptive as possible (to cover the main aspects of the topics semantics) as well as exhibiting flowery language. Flowery language will 

In [30]:
# All sentences were generated using GPT-3.5
expressive_sents = {
    'Happiness': "Embrace the joy within and let it radiate through every aspect of your being, illuminating your world with a genuine and boundless happiness.",
    'Anger': "Fiery rage engulfs one's soul, fueling a storm of furious emotions that demand release.",
    'Sadness': "Heavy clouds of sorrow hang low, casting a veil of melancholy upon the heart, as tears trace the path of aching emotions.",
    'Employment': "Embarking on a professional journey, one finds purpose, growth, and the fulfillment of contributing their skills to a meaningful endeavor.",
    'Death': "Death is the inevitable culmination of life's transient dance, a solemn passage that invites reflection on the ephemeral nature of our existence.",
    'Irritable': "Like a string stretched too tightly on a violin, one wrong note away from snapping - a simmering kettle on the brink of boiling over.",
    'Hateful': "Feeling hateful is like being consumed by a toxic storm of anger, bitterness, and a desire for harm towards someone or something.",
    "Hateful2": "People may feel hateful due to deep-seated resentment, fear, or a distorted perception of others, leading to a destructive emotional response."
}

emotive_sents = {
    'Happiness': "Embrace the joy within and let it radiate through every aspect of your being, illuminating your world with a genuine and boundless happiness.",
    'Joy': "A wave of joy swept over me, effervescent and light as a feather, filling every corner of my being. It was like the warm embrace of sunshine, radiant and brilliant, turning life into a sweet symphony of laughter, hope, and resplendent delight.",
    'Anger': "Fiery rage engulfs one's soul, fueling a storm of furious emotions that demand release.",
    'Sadness': "Heavy clouds of sorrow hang low, casting a veil of melancholy upon the heart, as tears trace the path of aching emotions.",
    'Sadness2': "A profound melancholy enveloped me, sinking into the depths of my soul like a stone thrown into a still pond. It was a heavy, unbearable sorrow, a silent weeping of the heart that turned the world colorless and joy into a distant memory.",
    'Surprise': "A jolt of astonishment surged through me like a sudden flash of lightning in a cloudless sky. My heart did a somersault, and my mind paused, suspended in a state of disbelieving wonder. The unexpected had danced into view, leaving me breathless and wide-eyed in its wake.",
    'Fear': "A sudden cold crept into my veins, spreading like frost across a winter windowpane. My heart pounded a staccato rhythm, a trembling drum echoing the ghostly wails of fear whispering in my ears. The world spun, light dimmed, and my breath became the hunted, racing desperately against an unseen predator."
}

# Encode all sentences
expressive_encs = {}
for i, keys in enumerate(expressive_sents):
    expressive_encs[keys] = [float(x) for x in model.encode(expressive_sents[keys])] 

In [31]:
def print_line_break():
    print("+========================================+")

def compare_to_text(text, encs):
    print_line_break()
    print(f"Sentence: {text}:\n")
    test_enc = [float(x) for x in model.encode(text)]
    for i, key in enumerate(encs):
        print(f"Similarity {key}:\t {util.cos_sim(test_enc, encs[key])[0][0]}")

def add_sentence_pair(sents : list(), text_pos, text_neg):
    sents.append(text_pos)
    sents.append(text_neg)

sents = []

# Add inverse sentences
add_sentence_pair(sents, "I'm working in the government", "I'm not working in the government")
add_sentence_pair(sents,"I'm working in the military", "I'm not working in the military")
add_sentence_pair(sents, "I work in child care", "I don't work in child care")

add_sentence_pair(sents, "I used to have a job in journalism", "I have a job in journalism")
add_sentence_pair(sents, "My cat got put down yesterday", "I chilled with my cat on the couch")
add_sentence_pair(sents, "Just visited my Grandmother yesterday, I brought her flowers", "I haven't seen her since the funeral")

add_sentence_pair(sents, "Fuck you and all of your friends", "What you said really hurt me")
add_sentence_pair(sents, "I started to cry", "Why did he do this, it's unfair")
add_sentence_pair(sents, "I am angry", "I am furious")
add_sentence_pair(sents, "I can't take much more of this", "The faucet won't stop dripping")
add_sentence_pair(sents, "This is bugging the shit out of me", "We were young, dumb and just looking for a good time")
add_sentence_pair(sents, "Engineer, 28, kills self over pressure to become a sati", "It's mandatory in my shitty country")
add_sentence_pair(sents, "Elon Musk calls himself a free speech absolutist, to justify turning a blind eye to hatred and bigotry on Twitter", "But when journalists report unfavourable news, they are banned without warning")

for sent in sents:
    compare_to_text(sent, expressive_encs)

Sentence: I'm working in the government:

Similarity Happiness:	 0.10270611941814423
Similarity Anger:	 0.06121491640806198
Similarity Sadness:	 0.07879886776208878
Similarity Employment:	 0.287301242351532
Similarity Death:	 0.06082461401820183
Similarity Irritable:	 0.11355352401733398
Similarity Hateful:	 0.07098932564258575
Similarity Hateful2:	 0.07893483340740204
Sentence: I'm not working in the government:

Similarity Happiness:	 0.03811220824718475
Similarity Anger:	 -0.004474332556128502
Similarity Sadness:	 0.02423037961125374
Similarity Employment:	 0.15425868332386017
Similarity Death:	 0.037387169897556305
Similarity Irritable:	 0.02653823047876358
Similarity Hateful:	 0.03356676548719406
Similarity Hateful2:	 0.030016232281923294
Sentence: I'm working in the military:

Similarity Happiness:	 0.09454156458377838
Similarity Anger:	 0.04252726212143898
Similarity Sadness:	 0.016075991094112396
Similarity Employment:	 0.2966892719268799
Similarity Death:	 0.0756569355726242
S

### Observations
- Unemployment is not useful. Appears to relate just as much to employment, sometimes more
- Inverse sentence meaning tends to

In [7]:
# Import testing dataset from twitter
df = pd.read_csv("./Data_Storage/twitter_sentiment/training.1600000.processed.noemoticon.csv", index_col=False)

In [10]:
# Testing
df_positive = df[df["target"] == 4]
df_negetive = df[df["target"] == 0]

target
0    800000
4    800000
Name: count, dtype: int64


In [None]:
# Cleaning text
def clean_text(text):
    