In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [None]:
# download model from hugging face
model_name = "project-aps/finbert-finetune"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
model.eval()  # set model to evaluation mode

In [None]:
from torchinfo import summary

dummy_input = tokenizer("This is a sample sentence about the stock market.",
                        return_tensors="pt",
                        padding="max_length",
                        truncation=True,
                        max_length=128)
dummy_input = {k: v.to(device) for k, v in dummy_input.items()}

# show model summary
summary(model, input_data=dummy_input, depth=3, col_names=["input_size", "output_size", "num_params", "trainable"])

Layer (type:depth-idx)                                  Input Shape               Output Shape              Param #                   Trainable
BertForSequenceClassification                           --                        [1, 3]                    --                        True
├─BertModel: 1-1                                        [1, 128]                  [1, 768]                  --                        True
│    └─BertEmbeddings: 2-1                              --                        [1, 128, 768]             --                        True
│    │    └─Embedding: 3-1                              [1, 128]                  [1, 128, 768]             23,440,896                True
│    │    └─Embedding: 3-2                              [1, 128]                  [1, 128, 768]             1,536                     True
│    │    └─Embedding: 3-3                              [1, 128]                  [1, 128, 768]             393,216                   True
│    │    └─LayerNorm:

# Inference (Pipeline)

In [6]:
# Override the config's id2label and label2id
label_map = {0: "neutral", 1: "negative", 2: "positive"}
model.config.id2label = label_map
model.config.label2id = {v: k for k, v in label_map.items()}

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)



In [7]:
text = "Earnings smashed expectations AAPL posts $0.89 EPS vs $0.78 est. Bullish momentum incoming! #EarningsSeason"
print(pipe(text)) #Output: [{'label': 'positive', 'score': 0.9997484087944031}]


[{'label': 'positive', 'score': 0.9997484087944031}]


In [13]:
texts = [
"I am not angry.",
]
outputs = pipe(texts)
# print(outputs)

# print in df format
output_pipe_df = pd.DataFrame({
    'text': texts,
    'label': [output['label'] for output in outputs],
    'score': [output['score'] for output in outputs]
})

print(output_pipe_df)

              text    label     score
0  I am not angry.  neutral  0.999642


# Inference (Simple Approach)

In [None]:
label_map_ours = {0: "neutral", 1: "negative", 2: "positive"}
def get_predictions_and_probs(model, inputs, label_map):
    """
    Predict labels and probabilities from a model given inputs and label_map.

    Args:
        model (torch.nn.Module): Trained model.
        inputs (dict): Tokenized inputs (from tokenizer(..., return_tensors='pt')).
        label_map (dict): Mapping from class indices to label names.

    Returns:
        predicted_labels (List[str]): List of predicted label names.
        probs (List[float]): List of maximum softmax probabilities.
    """
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        softmax_probs = torch.nn.functional.softmax(logits, dim=-1)

        max_probs, predictions = torch.max(softmax_probs, dim=-1)
        predicted_labels = [label_map[idx.item()] for idx in predictions]
        probs = max_probs.tolist()

    return predicted_labels, probs

In [10]:
texts = [
    "Earnings smashed expectations AAPL posts $0.89 EPS vs $0.78 est. Bullish momentum incoming! #EarningsSeason",
    "Apple Beats Q2 Expectations With Record iPhone Sales",
    "Tesla Stock Soars as EV Deliveries Surpass Forecasts",
    "Local Cat Elected Mayor of Small Town in Viral Social Media Sensation",
    "Researchers Develop Edible Water Bottles to Reduce Plastic Waste",
    "$META down 8% after missing on ad revenue. Growth slowing in key regions. Ouch. 😬📉 #Meta #FAANG",
    "Disappointing numbers from $NFLX — subscriber growth stalls again. Bear case gaining steam. 🐻 #Netflix",
]
inputs_ours = tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to(
    device
)
ours_predicted_labels, ours_probs = get_predictions_and_probs(
    model, inputs_ours, label_map_ours
)

# print in df format
output_df = pd.DataFrame(
    {"text": texts, "predicted_label": ours_predicted_labels, "probability": ours_probs}
)
output_df

Unnamed: 0,text,predicted_label,probability
0,Earnings smashed expectations AAPL posts $0.89...,positive,0.999748
1,Apple Beats Q2 Expectations With Record iPhone...,positive,0.999796
2,Tesla Stock Soars as EV Deliveries Surpass For...,positive,0.999829
3,Local Cat Elected Mayor of Small Town in Viral...,neutral,0.999672
4,Researchers Develop Edible Water Bottles to Re...,neutral,0.999308
5,$META down 8% after missing on ad revenue. Gro...,negative,0.9997
6,Disappointing numbers from $NFLX — subscriber ...,negative,0.999632
