In [2]:

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

base_model = "meta-llama/Meta-Llama-3-8B"
peft_model = "../outputs/lora_int8_sentiment"

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
    
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float16,     # <-- key line
    device_map="cuda:0",
    trust_remote_code=True, 
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(model, peft_model)
model = model.eval()


Loading weights: 100%|██████████| 291/291 [00:11<00:00, 25.85it/s, Materializing param=model.norm.weight]                              


In [3]:
import torch.nn.functional as F




device = 'cuda:0'

labels = ["negative", "neutral", "positive"]

@torch.no_grad()
def sentiment_probs(input_sentence, labels, max_length=2048):
    prompt_template = [
        f"""Instruction: What is the sentiment of this news? Please choose an answer from {{negative/neutral/positive}}
        Input: {input_sentence}
        Answer:"""
    ]
    # tokenize prompt
    prompt_tok = tokenizer(
        prompt_template, return_tensors="pt", padding=True, truncation=True, max_length=max_length
    )
    input_ids_prompt = prompt_tok["input_ids"].to(device)
    attn_prompt = prompt_tok["attention_mask"].to(device)

    B = input_ids_prompt.size(0)
    C = len(labels)

    # pre-tokenize the labels for comparison
    label_ids_list = [
        tokenizer(l, add_special_tokens=False).input_ids for l in labels
    ]

    # Compute scores: shape [B, C]
    scores = torch.empty((B, C), device=device, dtype=torch.float32)

    for ci, lab_ids in enumerate(label_ids_list):
        lab = torch.tensor(lab_ids, device=device).unsqueeze(0).repeat(B, 1)  # [B, L]
        L = lab.size(1)

        # Build combined input: [prompt, label]
        input_ids = torch.cat([input_ids_prompt, lab], dim=1)  # [B, P+L]
        attn = torch.cat([attn_prompt, torch.ones((B, L), device=device, dtype=attn_prompt.dtype)], dim=1)

        # Forward
        out = model(input_ids=input_ids, attention_mask=attn)
        logits = out.logits  # [B, P+L, V]

        # We want log p(label_token_i | prompt + previous label tokens)
        # The probability for token at position t is predicted from logits at position t-1.
        P = input_ids_prompt.size(1)
        # label tokens are at positions [P, P+L-1]
        # their predictors are logits at positions [P-1, P+L-2]
        pred_positions = torch.arange(P - 1, P + L - 1, device=device)  # length L
        label_positions = torch.arange(P, P + L, device=device)         # length L

        # Gather logprobs for the label tokens
        logprobs = F.log_softmax(logits[:, pred_positions, :], dim=-1)  # [B, L, V]
        target = input_ids[:, label_positions]                          # [B, L]
        token_logp = logprobs.gather(-1, target.unsqueeze(-1)).squeeze(-1)  # [B, L]

        # Mask out any padding in the prompt doesn't matter here; label tokens are always "real"
        # Length-normalize to reduce short-label bias
        scores[:, ci] = token_logp.mean(dim=1)

    probs = F.softmax(scores, dim=1)  # [B, C]
    return scores, probs




In [4]:
# goal: see if the current model can perform well at our task: 

import pandas as pd 

df = pd.read_csv('../labelled_posts.csv')





In [5]:
idx = 0
print('='*40 + 'Combined text:' + '='*40)
combined_text = df.iloc[idx]['combined_text']
print(df.iloc[idx]['combined_text'])


print('='*40 + 'GT Sentiment:' + '='*40)
print(df.iloc[idx]['sentiment'])

print('='*40 + 'Model Answer::' + '='*40)
scores, probs = sentiment_probs(combined_text, labels)


for i, p in enumerate(probs.tolist()):
    for lab, pr in zip(labels, p):
        print(f"  {lab:8s}: {pr:.4f}")


MSTR Puts

Opened a couple of weeks ago.. was hoping for bitcoin to crash in Q3.. things happened much faster. I think the floor will be around 35-40K for Bitcoin.

not this one.. don't see where the 25% will come from with the price of bitcoin
negative
  negative: 0.8609
  neutral : 0.1373
  positive: 0.0018


In [6]:
import torch
from tqdm import tqdm

correct = 0
total = len(df)

all_preds = []
all_gt = []
wrong_cases = []   # <-- store mistakes here

for idx in tqdm(range(total)):
    combined_text = df.iloc[idx]['combined_text']
    gt = df.iloc[idx]['sentiment'].strip().lower()

    scores, probs = sentiment_probs(combined_text, labels)

    pred_idx = torch.argmax(probs, dim=1).item()
    pred_label = labels[pred_idx]

    all_preds.append(pred_label)
    all_gt.append(gt)

    if pred_label == gt:
        correct += 1
    else:
        wrong_cases.append({
            "idx": idx,
            "text": combined_text,
            "ground_truth": gt,
            "prediction": pred_label,
            "probs": {lab: float(probs[0][i]) for i, lab in enumerate(labels)}
        })

accuracy = correct / total

print("="*50)
print(f"Total samples: {total}")
print(f"Correct: {correct}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Num wrong: {len(wrong_cases)}")
print("="*50)


100%|██████████| 1275/1275 [08:22<00:00,  2.54it/s]

Total samples: 1275
Correct: 1073
Accuracy: 0.8416
Num wrong: 202





In [None]:
# or.... split dataset manually 

