In [1]:
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

labels = ["negative", "neutral", "positive"]

print("Model Loaded Successfully!")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model Loaded Successfully!


In [2]:
# Load dataset for sentiment analysis
df = pd.read_csv("../data/large_dataset.csv.txt")

# Combine subject + body to make complete email text
df["email"] = df["subject"].fillna("") + " " + df["body"].fillna("")

# Use FIRST 10 EMAILS (assignment says 10 examples)
df = df.head(10)

df[["email"]]


Unnamed: 0,email
0,Unable to access shared mailbox I am getting a...
1,Rule not triggering Our auto-assignment rule i...
2,Email threads not merging Two replies from the...
3,Tag suggestions incorrect Tag suggestions are ...
4,Drafts disappearing Draft replies disappear wh...
5,Automation delay Our automation to mark emails...
6,Login issues Several users are unable to log i...
7,Export not downloading Exporting conversations...
8,Notification spam Agents are receiving duplica...
9,Feature request: bulk tagging We want an optio...


In [3]:
def local_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)

    scores = outputs.logits[0].numpy()
    scores = softmax(scores)

    sentiment = labels[scores.argmax()]
    confidence = float(scores.max())

    return {
        "sentiment": sentiment,
        "confidence": confidence,
        "scores": {
            "negative": float(scores[0]),
            "neutral": float(scores[1]),
            "positive": float(scores[2]),
        }
    }


In [4]:
results_local = []

for text in df["email"]:
    results_local.append(local_sentiment(text))

pd.DataFrame(results_local)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Unnamed: 0,sentiment,confidence,scores
0,negative,0.766414,"{'negative': 0.7664141654968262, 'neutral': 0...."
1,neutral,0.610799,"{'negative': 0.3460550904273987, 'neutral': 0...."
2,negative,0.626272,"{'negative': 0.6262716054916382, 'neutral': 0...."
3,negative,0.838448,"{'negative': 0.8384476900100708, 'neutral': 0...."
4,neutral,0.559096,"{'negative': 0.42579013109207153, 'neutral': 0..."
5,negative,0.802427,"{'negative': 0.8024265170097351, 'neutral': 0...."
6,negative,0.831666,"{'negative': 0.8316658735275269, 'neutral': 0...."
7,negative,0.872619,"{'negative': 0.8726190328598022, 'neutral': 0...."
8,negative,0.616538,"{'negative': 0.6165382862091064, 'neutral': 0...."
9,neutral,0.878786,"{'negative': 0.026254886761307716, 'neutral': ..."


In [5]:
def prompt_v1_simulation(text):
    result = local_sentiment(text)
    return {
        "sentiment": result["sentiment"],
        "confidence": result["confidence"],
        "reasoning": "Basic model output without evidence or refinement."
    }

results_v1 = [prompt_v1_simulation(t) for t in df["email"]]
pd.DataFrame(results_v1)


Unnamed: 0,sentiment,confidence,reasoning
0,negative,0.766414,Basic model output without evidence or refinem...
1,neutral,0.610799,Basic model output without evidence or refinem...
2,negative,0.626272,Basic model output without evidence or refinem...
3,negative,0.838448,Basic model output without evidence or refinem...
4,neutral,0.559096,Basic model output without evidence or refinem...
5,negative,0.802427,Basic model output without evidence or refinem...
6,negative,0.831666,Basic model output without evidence or refinem...
7,negative,0.872619,Basic model output without evidence or refinem...
8,negative,0.616538,Basic model output without evidence or refinem...
9,neutral,0.878786,Basic model output without evidence or refinem...


In [7]:
def prompt_v2_simulation(text):
    sentiment_data = local_sentiment(text)
    sentiment = sentiment_data["sentiment"]
    confidence = sentiment_data["confidence"]

    # Extract evidence keywords
    words = text.lower().split()
    negative_keywords = ["error", "issue", "not", "unable", "failed", "disappointed", "terrible", "slow", "broke"]
    positive_keywords = ["thanks", "great", "good", "resolved", "improvement"]

    evidence = []
    for w in words:
        if w in negative_keywords or w in positive_keywords:
            evidence.append(w)

    # Build detailed reasoning
    reasoning = f"Detected {sentiment} sentiment based on keywords: {evidence}"

    # Confidence calibration
    if confidence < 0.4:
        calibrated_conf = confidence * 0.70
    elif confidence < 0.7:
        calibrated_conf = confidence * 0.85
    else:
        calibrated_conf = confidence

    return {
        "sentiment": sentiment,
        "confidence": float(calibrated_conf),
        "evidence": evidence,
        "reasoning": reasoning
    }

results_v2 = [prompt_v2_simulation(t) for t in df["email"]]
pd.DataFrame(results_v2)


Unnamed: 0,sentiment,confidence,evidence,reasoning
0,negative,0.766414,[unable],Detected negative sentiment based on keywords:...
1,neutral,0.519179,[not],Detected neutral sentiment based on keywords: ...
2,negative,0.532331,[not],Detected negative sentiment based on keywords:...
3,negative,0.838448,[],Detected negative sentiment based on keywords: []
4,neutral,0.475232,[],Detected neutral sentiment based on keywords: []
5,negative,0.802427,[],Detected negative sentiment based on keywords: []
6,negative,0.831666,[unable],Detected negative sentiment based on keywords:...
7,negative,0.872619,[not],Detected negative sentiment based on keywords:...
8,negative,0.524058,[],Detected negative sentiment based on keywords: []
9,neutral,0.878786,[],Detected neutral sentiment based on keywords: []


In [8]:
final_df = pd.DataFrame({
    "email": df["email"],
    "v1_sentiment": [r["sentiment"] for r in results_v1],
    "v1_confidence": [r["confidence"] for r in results_v1],
    "v2_sentiment": [r["sentiment"] for r in results_v2],
    "v2_confidence": [r["confidence"] for r in results_v2],
    "v2_evidence": [r["evidence"] for r in results_v2],
    "v2_reasoning": [r["reasoning"] for r in results_v2]
})

final_df


Unnamed: 0,email,v1_sentiment,v1_confidence,v2_sentiment,v2_confidence,v2_evidence,v2_reasoning
0,Unable to access shared mailbox I am getting a...,negative,0.766414,negative,0.766414,[unable],Detected negative sentiment based on keywords:...
1,Rule not triggering Our auto-assignment rule i...,neutral,0.610799,neutral,0.519179,[not],Detected neutral sentiment based on keywords: ...
2,Email threads not merging Two replies from the...,negative,0.626272,negative,0.532331,[not],Detected negative sentiment based on keywords:...
3,Tag suggestions incorrect Tag suggestions are ...,negative,0.838448,negative,0.838448,[],Detected negative sentiment based on keywords: []
4,Drafts disappearing Draft replies disappear wh...,neutral,0.559096,neutral,0.475232,[],Detected neutral sentiment based on keywords: []
5,Automation delay Our automation to mark emails...,negative,0.802427,negative,0.802427,[],Detected negative sentiment based on keywords: []
6,Login issues Several users are unable to log i...,negative,0.831666,negative,0.831666,[unable],Detected negative sentiment based on keywords:...
7,Export not downloading Exporting conversations...,negative,0.872619,negative,0.872619,[not],Detected negative sentiment based on keywords:...
8,Notification spam Agents are receiving duplica...,negative,0.616538,negative,0.524058,[],Detected negative sentiment based on keywords: []
9,Feature request: bulk tagging We want an optio...,neutral,0.878786,neutral,0.878786,[],Detected neutral sentiment based on keywords: []
