In [168]:
import torch
import requests
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [2]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)



In [14]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)



In [109]:
#sequence = "The meaning of life is something I've seldom seek"
sequence = "The stock market hit hard"

In [177]:
tokens = tokenizer.tokenize(sequence)
tokens

['the', 'stock', 'market', 'hit', 'hard']

In [111]:
ids = tokenizer.convert_tokens_to_ids(tokens)

In [112]:
input_ids = torch.tensor(ids)

In [113]:
output = model(input_ids.reshape(1, len(ids)))

In [114]:
output.logits

tensor([[ 2.5104, -2.1415]], grad_fn=<AddmmBackward0>)

In [115]:
# getting the labels for the above logits
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [116]:
# we need to apply a softmax on the logits to get probability
predictions = torch.nn.functional.softmax(output.logits, dim=1)

In [127]:
# helper function to determine sentiment
from collections import namedtuple

Sentiment = namedtuple("Sentiment", "sentiment prob")
def get_sentiment(prediction: torch.Tensor, threshold: float =  0.7) -> Sentiment:
    index = torch.argmax(prediction)
    prob_lst = prediction.tolist()[0]
    index = index.item()    
    return Sentiment(index == 1, prob_lst[index])


In [128]:
pred = get_sentiment(predictions)
pred

Sentiment(sentiment=False, prob=0.9905471205711365)

In [160]:
### Writing a function that takes in arbitrary lenght of sentances and returns their sentiments as Sentiment Object

def get_sentiments(sentences: list[str], debug=False) -> list[Sentiment]:
    tokens = tokenizer(sentences, return_tensors="pt", padding="max_length")    
    input_ids = tokens["input_ids"]
    output = model(input_ids, attention_mask=tokens["attention_mask"].clone().detach()) # the attention_mask will ignore the padding tokens
    if debug:
        print("Logits", output.logits)
    predictions = torch.nn.functional.softmax(output.logits, dim=1)
    return [get_sentiment(torch.Tensor(pred).reshape(1, len(pred))) for pred in predictions.tolist()]
    

In [161]:
sentences = [
   "The outcome was positive",
   "The outcome was negative"
]

In [162]:
get_sentiments(sentences)

[Sentiment(sentiment=True, prob=0.9998314380645752),
 Sentiment(sentiment=False, prob=0.9997503161430359)]

In [165]:
get_sentiments(sentences[0])

[Sentiment(sentiment=True, prob=0.9998314380645752)]

In [164]:
get_sentiments(sentences[1])

[Sentiment(sentiment=False, prob=0.9997503161430359)]

In [166]:
get_sentiments(sentences)

[Sentiment(sentiment=True, prob=0.9998314380645752),
 Sentiment(sentiment=False, prob=0.9997503161430359)]

### Lets run a larger test by checking the sentiments of first page of hacker news

In [169]:
def get_title(post_id):
    url = f"https://hacker-news.firebaseio.com/v0/item/{post_id}.json"
    resp = requests.get(url)
    if resp.status_code == 200:
        json_data = resp.json()
        return json_data["title"]
    return ""

In [170]:
top_posts_resp = requests.get("https://hacker-news.firebaseio.com/v0/topstories.json")

if top_posts_resp.status_code == 200:
    top_ids = top_posts_resp.json()[:40]
    top_titles = [get_title(_id) for _id in top_ids]


In [172]:
sentiments = get_sentiments(top_titles)


In [174]:
for yc_title, sentiment in zip(top_titles, sentiments):
    print(yc_title, f" | Sentiment  : {sentiment.sentiment}", f" | Probility {sentiment.prob} ")

The Time I Lied to the CTO and Saved the Day  | Sentiment  : False  | Probility 0.9590507745742798 
AlphaFold 3 predicts the structure and interactions of life's molecules  | Sentiment  : False  | Probility 0.7783793807029724 
Consistency LLM: converting LLMs to parallel decoders accelerates inference 3.5x  | Sentiment  : True  | Probility 0.9795705080032349 
A look inside illegal drone deliveries at B.C.'s prisons  | Sentiment  : True  | Probility 0.9969395399093628 
Opening Windows in Linux with sockets, bare hands and 200 lines of C  | Sentiment  : False  | Probility 0.9976108074188232 
Industrial Design Student Work: "How Long Should Objects Last?"  | Sentiment  : False  | Probility 0.9987938404083252 
How to Use the Foreign Function API in Java 22 to Call C Libraries  | Sentiment  : False  | Probility 0.9972713589668274 
Development Notes from xkcd's "Machine"  | Sentiment  : False  | Probility 0.9734192490577698 
Show HN: AI climbing coach – visualize how to climb any route based

In [None]:
### Printing all the posts with positive sentiments

In [175]:
for idx,sentiment in enumerate(sentiments):
    if sentiment.sentiment:
        print(sentiment, " ", top_titles[idx])

Sentiment(sentiment=True, prob=0.9795705080032349)   Consistency LLM: converting LLMs to parallel decoders accelerates inference 3.5x
Sentiment(sentiment=True, prob=0.9969395399093628)   A look inside illegal drone deliveries at B.C.'s prisons
Sentiment(sentiment=True, prob=0.6669753789901733)   Show HN: AI climbing coach – visualize how to climb any route based on your body
Sentiment(sentiment=True, prob=0.9310887455940247)   TimesFM: Time Series Foundation Model for time-series forecasting
Sentiment(sentiment=True, prob=0.9997443556785583)   Breathwork supports emergence of altered states of consciousness
Sentiment(sentiment=True, prob=0.9972541928291321)   Radius Full Page Display
Sentiment(sentiment=True, prob=0.9931683540344238)   Pulley system composition – a systematic approach (2020)
Sentiment(sentiment=True, prob=0.6877611875534058)   Securing Git Repositories with Gittuf
Sentiment(sentiment=True, prob=0.8540497422218323)   Empirical Health (YC S23) is hiring engineering leads

In [176]:
### getting negative sentiments
for idx,sentiment in enumerate(sentiments):
    if not sentiment.sentiment:
        print(sentiment, " ", top_titles[idx])

Sentiment(sentiment=False, prob=0.9590507745742798)   The Time I Lied to the CTO and Saved the Day
Sentiment(sentiment=False, prob=0.7783793807029724)   AlphaFold 3 predicts the structure and interactions of life's molecules
Sentiment(sentiment=False, prob=0.9976108074188232)   Opening Windows in Linux with sockets, bare hands and 200 lines of C
Sentiment(sentiment=False, prob=0.9987938404083252)   Industrial Design Student Work: "How Long Should Objects Last?"
Sentiment(sentiment=False, prob=0.9972713589668274)   How to Use the Foreign Function API in Java 22 to Call C Libraries
Sentiment(sentiment=False, prob=0.9734192490577698)   Development Notes from xkcd's "Machine"
Sentiment(sentiment=False, prob=0.9975948929786682)   Show HN: I built a non-linear UI for ChatGPT
Sentiment(sentiment=False, prob=0.9952829480171204)   Show HN: SimBricks – Modular Full-System Simulation for HW-SW Systems
Sentiment(sentiment=False, prob=0.9983161687850952)   Stack Overflow users deleting answers afte