This repository contains a sentiment analysis project using the DistilBERT model. Sentiment analysis involves classifying text data into different sentiment categories, such as positive (label-1), negative (label-0), or neutral (label-2).

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="Dmyadav2001/Sentimental-Analysis")


config.json:   0%|          | 0.00/769 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.25k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cpu


In [28]:
test_sentences = [
    "I love ChatGPT; it makes my work day so much easier!",           # → positive
    "This is the worst customer service I’ve ever experienced.",      # → negative
    "The movie was okay—nothing spectacular, nothing awful.",         # → neutral
    "I’m furious that my order still hasn’t arrived!",                # → negative
    "Thank you for your quick response and help!",                    # → positive
    "The package arrived late, but the support team was helpful.",    # → mixed (often neutral/positive)
    "Weather today is cloudy with a slight chance of rain.",          # → neutral
    "Ugh, I’m so disappointed in this product.",                      # → negative
    "Wow, that performance totally exceeded my expectations!",        # → positive
    "It’s fine, I guess—could be better, could be worse."             # → neutral
]

for s in test_sentences:
    print(f"{s}\n→ {pipe(s)[0]}\n")

I love ChatGPT; it makes my work day so much easier!
→ {'label': 'LABEL_1', 'score': 0.9457300901412964}

This is the worst customer service I’ve ever experienced.
→ {'label': 'LABEL_0', 'score': 0.9769734740257263}

The movie was okay—nothing spectacular, nothing awful.
→ {'label': 'LABEL_0', 'score': 0.8014140725135803}

I’m furious that my order still hasn’t arrived!
→ {'label': 'LABEL_0', 'score': 0.9363728761672974}

Thank you for your quick response and help!
→ {'label': 'LABEL_1', 'score': 0.9738197326660156}

The package arrived late, but the support team was helpful.
→ {'label': 'LABEL_1', 'score': 0.7528666257858276}

Weather today is cloudy with a slight chance of rain.
→ {'label': 'LABEL_0', 'score': 0.9389627575874329}

Ugh, I’m so disappointed in this product.
→ {'label': 'LABEL_0', 'score': 0.9596520066261292}

Wow, that performance totally exceeded my expectations!
→ {'label': 'LABEL_1', 'score': 0.8489273190498352}

It’s fine, I guess—could be better, could be worse.
→

manual batching, GPU moves, custom logic

In [31]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load weights + tokenizer  (~150 MB download the first time)
model_name = "Dmyadav2001/Sentimental-Analysis"
tokenizer   = AutoTokenizer.from_pretrained(model_name)
model       = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()                        # turn off dropout
# Optional: model.to("cuda")  if you have a GPU

# Map the numeric IDs back to human‑readable tags
# Model card explains: label_0 = negative, label_1 = positive, label_2 = neutral :contentReference[oaicite:0]{index=0}
id2label = {0: "negative", 1: "positive", 2: "neutral"}

# Helper function
def predict_sentiment(texts):
    """
    texts: a string or a list of strings
    returns list of dicts: {"label": "...", "score": float}
    """
    if isinstance(texts, str):
        texts = [texts]

    # Tokenise *as a batch* so GPU/CPU vectorises efficiently
    enc = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=256,
        return_tensors="pt"
    )
    with torch.no_grad():
        logits = model(**enc).logits              # shape [batch, 3]

    probs = F.softmax(logits, dim=-1)             # turn scores into probabilities
    preds = probs.argmax(dim=-1).tolist()         # highest‑probability class id
    outputs = [
        {
            "label": id2label[p],                 # negative / positive / neutral
            "score": round(probs[i, p].item(), 4) # confidence 0‑1
        }
        for i, p in enumerate(preds)
    ]
    return outputs if len(outputs) > 1 else outputs[0]

# Quick smoke‑test
samples = [
    "I absolutely love this place!",
    "This is the worst customer service ever.",
    "Eh, the movie was okay I guess."
]
for s, out in zip(samples, predict_sentiment(samples)):
    print(f"{s}\n→ {out}\n")


I absolutely love this place!
→ {'label': 'positive', 'score': 0.9804}

This is the worst customer service ever.
→ {'label': 'negative', 'score': 0.981}

Eh, the movie was okay I guess.
→ {'label': 'positive', 'score': 0.8186}

