# Učitavanje spremljenog modela


In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import torch
import pandas as pd
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "./spremljeni_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path) # ucitavanje modela
tokenizer = AutoTokenizer.from_pretrained(model_path) # ucitavanje tokenizatora

In [3]:
# Prilagodi preprocess function za samo predikciju (bez labela)
def preprocess_function_for_inference(df, label2id):
    # Samo tokenizacija, bez dodavanja labela
    tokenized_inputs = tokenizer(
        df['text'],
        padding='longest',
        truncation=True,
        max_length=512,
        return_tensors='pt'  # vraća podatke kao PyTorch tenzore
    )
    return tokenized_inputs

In [4]:
# Dummy training args – neće se trenirati, samo trebaju Traineru
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=1,
    do_train=False,
    do_eval=False,
    logging_dir="./logs"
)

# Inicijalizacija trenera (iako ga koristimo samo za predikciju)
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args
)


  trainer = Trainer(


In [5]:
label2id = {'sadness': 0, 'anger': 1, 'love': 2, 'surprise': 3, 'fear': 4, 'joy': 5}
id2label = {v: k for k, v in label2id.items()}

# funkcija za testiranje pojedinacne recenice
def predikcija_iz_recenice(my_sentence):
    dummy_emotion = "joy"  # mora biti nešto iz label2id
    # Pandas DataFrame
    df = pd.DataFrame({
        "text": [my_sentence],
        "emotion": [dummy_emotion]
    })

    # HuggingFace Dataset
    sentence_dataset = Dataset.from_pandas(df)

    # Koristi mapiranje samo za tekst
    sentence_dataset = sentence_dataset.map(lambda x: preprocess_function_for_inference(x, label2id), batched=True)

    # Predikcija
    prediction = trainer.predict(sentence_dataset)

    # Uzmi klasu s najvećim scoreom
    predicted_class = prediction.predictions.argmax(axis=1)[0]

    # Ako imaš id2label mapu
    predicted_label = id2label[predicted_class]
    print(f"Predikcija emocije: {predicted_label}")

In [6]:
predikcija_iz_recenice("I am so happy today")

Map: 100%|██████████| 1/1 [00:00<00:00, 17.72 examples/s]


Predikcija emocije: joy


# Razumijevanje Predikcija Modela uz SHAP
- objašnjava zašto je model donio određenu odluku
- koje su varijable imale koliko utjecaja na pojedinu predikciju.


In [16]:
import shap
from transformers import pipeline

In [17]:
# Build a pipeline object for predictions
preds = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True,
)

# Create an explainer
explainer = shap.Explainer(preds)

Device set to use mps:0


In [19]:
example_texts = [
    "I am so happy today!",
    "I can't believe this is happening, I'm really angry.",
    "I feel so sad and disappointed.",
    "What a wonderful surprise!"
]

# Compute SHAP values using explainer
shap_values = explainer(example_texts)

# Make SHAP text plot
shap.plots.text(shap_values)