In [1]:
!pip install transformers datasets scikit-learn pandas

Collecting transformers
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2025.11.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting hf-xet<2.0.0,>=1.1.3 (from huggingface-hub<1.0,>=0.34.0->transformers)
  Downloading hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting

In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score
from transformers import pipeline

In [3]:
data = [
    # positive
    ("I love this movie, it was amazing!", "positive"),
    ("This song makes me happy.", "positive"),
    ("The food was really good, I enjoyed it.", "positive"),
    ("I had a great day today.", "positive"),
    ("The lecture was clear and helpful.", "positive"),
    ("I feel satisfied with the result.", "positive"),
    ("The new update is actually pretty nice.", "positive"),
    ("My friend was so kind to me today.", "positive"),
    ("The weather is beautiful and I like it.", "positive"),
    ("This app is very useful and easy to use.", "positive"),
    ("The cafe was cozy and the staff were friendly.", "positive"),
    ("I didn’t expect much, but it turned out great.", "positive"),
    ("The presentation went better than I thought.", "positive"),
    ("I’m proud of what I did today.", "positive"),
    ("The service was slow, but overall I’m satisfied.", "positive"),
    ("It wasn’t perfect, but I still enjoyed it.", "positive"),
    ("The game was fun and exciting.", "positive"),
    ("I feel calm right now.", "positive"),
    ("The new feature is surprisingly helpful.", "positive"),
    ("Even though I was tired, the day felt rewarding.", "positive"),

    # negative
    ("I hate this movie, it was terrible.", "negative"),
    ("This is the worst experience ever.", "negative"),
    ("The food tasted bad and disgusting.", "negative"),
    ("I am really disappointed with the service.", "negative"),
    ("Today was such a horrible day.", "negative"),
    ("The lecture was boring and confusing.", "negative"),
    ("I feel so upset about the result.", "negative"),
    ("The new update is really annoying.", "negative"),
    ("My friend ignored me and I feel bad.", "negative"),
    ("The weather is awful and I hate it.", "negative"),
    ("I thought it would be good, but it was actually bad.", "negative"),
    ("The app keeps crashing, it’s frustrating.", "negative"),
    ("It wasn’t the worst, but I can’t say I liked it.", "negative"),
    ("The more I use this, the more disappointed I feel.", "negative"),
    ("I’m tired and nothing went well today.", "negative"),
    ("The staff were rude and unhelpful.", "negative"),
    ("I tried to enjoy it, but I just couldn’t.", "negative"),
    ("The result is far from what I expected.", "negative"),
    ("I feel anxious and unhappy about this.", "negative"),
    ("Even though some parts were okay, overall it was bad.", "negative"),
]

df = pd.DataFrame(data, columns=["text", "label"])
len(df)


40

In [4]:
def rule_based_sentiment(text: str) -> str:
    t = text.lower()

    positive_keywords = ["love", "happy", "amazing", "great", "good", "nice", "satisfied", "beautiful", "useful", "kind", "enjoyed", "helpful"]
    negative_keywords = ["hate", "worst", "terrible", "bad", "disappointed", "horrible", "boring", "upset", "annoying", "awful", "disgusting", "ignored"]

    # count how many positive/negative words appear
    pos_score = sum(kw in t for kw in positive_keywords)
    neg_score = sum(kw in t for kw in negative_keywords)

    if pos_score > neg_score:
        return "positive"
    elif neg_score > pos_score:
        return "negative"
    else:
        # default guess when it's unclear
        return "negative"

# test
print(rule_based_sentiment("I love this movie but the ending was bad"))


negative


In [5]:
texts = df["text"].tolist()
true_labels = df["label"].tolist()

baseline_preds = [rule_based_sentiment(t) for t in texts]
baseline_acc = accuracy_score(true_labels, baseline_preds)

baseline_acc

0.8

In [6]:
sentiment_model = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cuda:0


In [7]:
# Run the model on all texts
hf_outputs = sentiment_model(df["text"].tolist())

# Show the first few results
hf_outputs[:5]

[{'label': 'POSITIVE', 'score': 0.9998791217803955},
 {'label': 'POSITIVE', 'score': 0.9998822212219238},
 {'label': 'POSITIVE', 'score': 0.9998779296875},
 {'label': 'POSITIVE', 'score': 0.9998636245727539},
 {'label': 'POSITIVE', 'score': 0.9997597336769104}]

In [8]:
def hf_label_to_simple(label: str) -> str:
    return "positive" if label.upper() == "POSITIVE" else "negative"

hf_preds = [hf_label_to_simple(o["label"]) for o in hf_outputs]

hf_acc = accuracy_score(df["label"], hf_preds)
hf_acc

1.0

In [9]:
print("Baseline accuracy:", baseline_acc)
print("AI pipeline accuracy:", hf_acc)
print()

print("Examples where baseline and AI differ:\n")
for text, true, base, hf in zip(df["text"], df["label"], baseline_preds, hf_preds):
    if base != hf:
        print("TEXT:", text)
        print("TRUE LABEL:", true)
        print("BASELINE:", base)
        print("AI MODEL:", hf)
        print("-" * 60)

Baseline accuracy: 0.8
AI pipeline accuracy: 1.0

Examples where baseline and AI differ:

TEXT: The cafe was cozy and the staff were friendly.
TRUE LABEL: positive
BASELINE: negative
AI MODEL: positive
------------------------------------------------------------
TEXT: The presentation went better than I thought.
TRUE LABEL: positive
BASELINE: negative
AI MODEL: positive
------------------------------------------------------------
TEXT: I’m proud of what I did today.
TRUE LABEL: positive
BASELINE: negative
AI MODEL: positive
------------------------------------------------------------
TEXT: The game was fun and exciting.
TRUE LABEL: positive
BASELINE: negative
AI MODEL: positive
------------------------------------------------------------
TEXT: I feel calm right now.
TRUE LABEL: positive
BASELINE: negative
AI MODEL: positive
------------------------------------------------------------
TEXT: Even though I was tired, the day felt rewarding.
TRUE LABEL: positive
BASELINE: negative
AI MODEL