In [21]:
from transformers import pipeline
import torch 
import spacy
from nltk import pos_tag
import textstat
from pathlib import Path
import json

In [22]:
# goal: sentiment- detect emotional warmth, empathy, tone
sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")


Device set to use mps:0


In [31]:
label_score ={
    'LABEL_0': 0.0,  # negative
    'LABEL_1': 0.5,  # neutral
    'LABEL_2': 1.0,  # positive
}
nlp = spacy.load("en_core_web_sm")

def get_sentiment_score(text):
    try:
        result = sentiment_pipe(text[:512][0])
        print("senty: ", result)
        dict = result[0]
        print(round(dict['score'], 2))
        # get the assigned label or default to .5
        return round(dict['score'], 2)
    except Exception as e:
        print("error with sentiment score: ", e)
        return 0.5

# goal: readability- check if text is clearly written
def get_readability_score(text):
    result = textstat.flesch_reading_ease(text) # from 0-100, good is 70-80
    result = max(0, min(result/100,1)) # normalize
    return result

# goal: specificity- how detailed or vague is the text?
def get_specificity_score(text):
    doc = nlp(text)
    detail_words = [token for token in doc if {"NOUN", "PROPN", "VERB"}] # only flags nouns and words bc those contribute to specificity the most
    if len(doc) > 0:
        return len(detail_words) / len(doc)
    else: 
        return 0

# goal: personalization- how personal is the advice
def get_personalization_score(text):
    personal_words = ["you", "your", "yours", "i", "me", "my", "mine", "we", "us", "our"]
    doc = nlp(text.lower())
    for token in doc:
        count = sum(1 for word in doc if word in personal_words)
    return count


def total_trust_score(text):
    total = (.25 * get_personalization_score(text)) + (.25 * get_specificity_score(text)) +(.25 * get_readability_score(text)) + (.25 * get_sentiment_score(text))
    return total


In [32]:
output_path = Path("data/dear_abby_scored.jsonl")
data = []
with output_path.open("r") as file:
    for line in file:
        if line.strip():
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Skipping invalid JSON line: {line.strip()}\nError: {e}")

for line in data:
    total = 0.0
    text = line["ai_advice"]
    print(get_personalization_score(text))
    print(total_trust_score(text))
    print(total)
#     line["trust_score"] = total

# with output_path.open("w") as f:
#     for line in data:
#         f.write(json.dumps(line) + "\n")

0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.48526865756541526
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.5198447963800905
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.5402384815436242
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.4850145970394737
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.5466581683168317
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.48839407894736847
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5068966746330261}]
0.51
0.5356695308110133
0.0
0
senty:  [{'label': 'LABEL_1', 'score': 0.5039997696876526}]
0.5
0.5247079941860465
0.0
