In [72]:
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import torch
import pandas as pd
from datasets import Dataset

Učitavanje modela i tokenizatora

In [73]:
model_path = "./spremljeni_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path) # ucitavanje modela
tokenizer = AutoTokenizer.from_pretrained(model_path) # ucitavanje tokenizatora

In [74]:
# Dummy training args – neće se trenirati, samo trebaju Traineru
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=1,
    do_train=False,
    do_eval=False,
    logging_dir="./logs"
)

# Inicijalizacija trainera (iako ga koristimo samo za predikciju)
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args
)

  trainer = Trainer(


Preprocessing rečenice

In [75]:
# Prilagodi preprocess function za samo predikciju (bez labela)

def preprocess_function(df):

    # tokenizacija stupca "text"
    tokenized_inputs = tokenizer( # koristi se distilBERT tokenizer
        df['text'], # sto treba tokenizirati
        padding='longest', # dodaje 0 gdje treba da svi inputi budu iste duljine
        truncation=True, # reže preduge rečenice (>512 tokena)
        max_length=512, # osigurava da ništa ne prelazi 512 tokena (maks. za BERT)
        return_tensors='pt' # vraća podatke kao PyTorch tenzore (torch.Tensor) umjesto Python lista
    ) # vraca rjecnik s kljucevima 'input_ids', 'attention_mask'

    return tokenized_inputs

In [76]:
labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
id2label = {i: label for i, label in enumerate(labels)}
label2id = {label: i for i, label in enumerate(labels)}

Predikcija iz rečenice

In [77]:
# funkcija za testiranje pojedinacne recenice
def prediction(my_sentence):
    dummy_emotion = 0  # mora biti nešto iz label2id
    # Pandas DataFrame
    df = pd.DataFrame({
        "text": [my_sentence],
        "label": [dummy_emotion]
    })

    # HuggingFace Dataset
    sentence_dataset = Dataset.from_pandas(df)

    # Koristi mapiranje samo za tekst
    sentence_dataset = sentence_dataset.map(lambda x: preprocess_function(x), batched=True)

    # Predikcija
    prediction = trainer.predict(sentence_dataset)

    logits = prediction.predictions[0] # jos nije sve u zbroj = 0
    probabilities = np.exp(logits) / np.sum(np.exp(logits)) # pretvorba u probit vrijednosti (da se zbroje u 0)

    return probabilities # to je numpy array

In [78]:
# vraca najintenzivniju emociju
def najintenzivnija_emocija(recenica, id2label):
    probs = prediction(recenica)
    predicted_index = np.argmax(probs) # indeks najveceg elementa
    predicted_label = id2label[predicted_index]
    return predicted_label


In [79]:
recenica = "my cat died today"
print(najintenzivnija_emocija(recenica, id2label))

Map: 100%|██████████| 1/1 [00:00<00:00, 30.09 examples/s]


sadness


In [80]:
prediction("my cat celebrated birthday today")

Map: 100%|██████████| 1/1 [00:00<00:00, 313.36 examples/s]


array([3.02315748e-05, 9.99902964e-01, 1.10520095e-05, 1.56774149e-05,
       1.26503692e-05, 2.73834266e-05], dtype=float32)

## Predikcija emocija na većem tekstu koristeći Sliding Window


Svaka rečenica može se pojaviti u više različitih klizećih prozora (sliding windows)
— dakle, analizira se više puta, svaki put u drugačijem kontekstu susjednih rečenica.

Da bismo dobili pouzdaniju predikciju emocije za svaku rečenicu, trebamo:

- zabilježiti sve emocijske vektore koje je rečenica dobila kroz različite prozore (sentence_probs)
- kombinirati te vektore (npr. računanjem prosjeka)
- i na temelju tog prosječnog vektora i predikcije pojedinačne rečenice donijeti konačnu odluku o emociji


Ovakav pristup uzima u obzir kontekst rečenice u cijelom tekstu, a ne samo u izolaciji — što dovodi do robustnijih i točnijih predikcija.

In [81]:
import nltk
nltk.download("punkt") # preuzima se punkt model, on zna prepoznati pocetke i krajeve recenica
from nltk.tokenize import sent_tokenize # sent_tokenize je fja koja dijeli tekst na recenice
from collections import defaultdict

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/patricijamarijanovic/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [82]:
def split_into_sentences(full_text):
    sentences = sent_tokenize(full_text)
    return sentences


In [83]:
def print_emotion_probs(probs, id2label):
    for i, score in enumerate(probs):
        print(f"  {id2label[i]}: {score:.4f}")

In [84]:
"""
argumenti:
        full_text (str): Cijeli tekst za analizu
        window_size (int): Broj rečenica u prozoru
        stride (int): Korak pomaka prozora
        id2label (dict): Rječnik indeksa i pripadnih emocija
"""

def analyze_emotions_in_text(full_text, window_size, stride, id2label):
    sentences = split_into_sentences(full_text)
    sentence_probs = defaultdict(list) # automatski stvara praznu listu za svaki novo dodani kljuc
    local_sentence_probs = [] # predikcije za pojedinacnu recenicu

    # lokalna predikcija za svaku recenicu pojedinacno
    for sent in sentences:
        local_vector = prediction(sent)
        local_sentence_probs.append(local_vector)

    # kontekstualna predikcija kroz prozore
    for i in range (0, len(sentences) - window_size + 1, stride):
        window = sentences[i : i + window_size]
        text_block = " ".join(window)
        predicted_vector = prediction(text_block)

        for j, sent in enumerate(window):
            index = i + j # indeks recenice u ukupnoj listi
            # za svaki kljuc postoji lista svih predikcija za nju (za sve prozore u kojima se nasla)
            sentence_probs[index].append(predicted_vector)

        # print (f"{i}. iteracija: {text_block} --> {predicted_vector}")


    final_emotions = []
    for i in range (len(sentences)):
        contextual_vectors = sentence_probs[i] # lista predikcija za svaki prozor u kojem se nasla ta recenica
        avg = np.mean(contextual_vectors, axis = 0) # prosjek svih vektora, po stupcima

        combined = 0.7 * local_sentence_probs[i] + 0.3 * avg

        predicted_index = np.argmax(combined) # indeks najvece vjerojatnosti za emociju
        predicted_label = id2label[predicted_index]
        final_emotions.append({
            "sentence" : sentences[i],
            "predicted_label" : predicted_label,
            "probs" : combined
        })

        print()
        print("Rečenica: ", final_emotions[i]["sentence"])
        print("LOCAL PREDICTION")
        print_emotion_probs(local_sentence_probs[i], id2label)

        print()
        print("LOCAL + CONTEXTUAL PREDICTION")
        print_emotion_probs(final_emotions[i]["probs"], id2label)

    return final_emotions

In [85]:
text = "I spilled coffee on my shirt and wanted to scream. But then I saw my cat sleeping and felt strangely calm. The traffic was unbearable and I thought I'd lose my mind. Suddenly, my favorite song came on the radio and I sang along like nothing mattered. I dropped my lunch on the floor and just stared at it, defeated. Then someone brought me a donut and everything felt okay again."

analyze_emotions_in_text(text, 3, 1, id2label)

Map: 100%|██████████| 1/1 [00:00<00:00, 374.26 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 226.71 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 456.40 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 408.64 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 373.56 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 375.97 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 330.23 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 328.63 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 378.21 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 331.46 examples/s]



Rečenica:  I spilled coffee on my shirt and wanted to scream.
LOCAL PREDICTION
  sadness: 0.0014
  joy: 0.0003
  love: 0.0000
  anger: 0.9980
  fear: 0.0003
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.0020
  joy: 0.2931
  love: 0.0000
  anger: 0.6989
  fear: 0.0060
  surprise: 0.0000

Rečenica:  But then I saw my cat sleeping and felt strangely calm.
LOCAL PREDICTION
  sadness: 0.0001
  joy: 0.9992
  love: 0.0000
  anger: 0.0001
  fear: 0.0006
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.0021
  joy: 0.9931
  love: 0.0001
  anger: 0.0007
  fear: 0.0040
  surprise: 0.0000

Rečenica:  The traffic was unbearable and I thought I'd lose my mind.
LOCAL PREDICTION
  sadness: 0.0385
  joy: 0.0059
  love: 0.0000
  anger: 0.5163
  fear: 0.4392
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.1281
  joy: 0.1999
  love: 0.0000
  anger: 0.3621
  fear: 0.3099
  surprise: 0.0000

Rečenica:  Suddenly, my favorite song came on the radio and I sang alon

[{'sentence': 'I spilled coffee on my shirt and wanted to scream.',
  'predicted_label': 'anger',
  'probs': array([1.9925889e-03, 2.9307967e-01, 2.4316398e-05, 6.9892120e-01,
         5.9820125e-03, 1.0969239e-07], dtype=float32)},
 {'sentence': 'But then I saw my cat sleeping and felt strangely calm.',
  'predicted_label': 'joy',
  'probs': array([2.1470138e-03, 9.9310923e-01, 6.0889874e-05, 6.7751360e-04,
         4.0050247e-03, 3.0869273e-07], dtype=float32)},
 {'sentence': "The traffic was unbearable and I thought I'd lose my mind.",
  'predicted_label': 'anger',
  'probs': array([1.2809834e-01, 1.9993639e-01, 4.7792590e-05, 3.6205685e-01,
         3.0986023e-01, 3.9415158e-07], dtype=float32)},
 {'sentence': 'Suddenly, my favorite song came on the radio and I sang along like nothing mattered.',
  'predicted_label': 'joy',
  'probs': array([1.6855006e-01, 7.9951096e-01, 4.7042654e-04, 2.8155677e-02,
         3.2954563e-03, 1.7406446e-05], dtype=float32)},
 {'sentence': 'I dropped 

In [88]:
text = "I couldn’t find my left sock and felt irrationally angry. I made a smoothie, but forgot the lid and it exploded everywhere — I just froze, horrified. A few minutes later, my dog licked my hand and I felt like everything would be okay. I sent a risky text and immediately regretted it. But they replied with a smiley and suddenly I was relieved and excited. I stubbed my toe on the table and yelled. Then my favorite show released a surprise episode and I screamed with joy."

analyze_emotions_in_text(text, 3, 1, id2label)

Map: 100%|██████████| 1/1 [00:00<00:00, 112.90 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 144.34 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 268.26 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 282.25 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 233.04 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 278.08 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 259.53 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 438.41 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 296.10 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 449.69 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 378.65 examples/s]


Map: 100%|██████████| 1/1 [00:00<00:00, 88.82 examples/s]



Rečenica:  I couldn’t find my left sock and felt irrationally angry.
LOCAL PREDICTION
  sadness: 0.0000
  joy: 0.0000
  love: 0.0000
  anger: 0.9999
  fear: 0.0000
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.0001
  joy: 0.0000
  love: 0.0000
  anger: 0.9999
  fear: 0.0001
  surprise: 0.0000

Rečenica:  I made a smoothie, but forgot the lid and it exploded everywhere — I just froze, horrified.
LOCAL PREDICTION
  sadness: 0.0022
  joy: 0.0005
  love: 0.0000
  anger: 0.9944
  fear: 0.0029
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.1175
  joy: 0.0103
  love: 0.0004
  anger: 0.8688
  fear: 0.0031
  surprise: 0.0000

Rečenica:  A few minutes later, my dog licked my hand and I felt like everything would be okay.
LOCAL PREDICTION
  sadness: 0.0005
  joy: 0.9984
  love: 0.0001
  anger: 0.0004
  fear: 0.0006
  surprise: 0.0000

LOCAL + CONTEXTUAL PREDICTION
  sadness: 0.0777
  joy: 0.8055
  love: 0.0003
  anger: 0.1154
  fear: 0.0012
  surprise: 0.0000

Re

[{'sentence': 'I couldn’t find my left sock and felt irrationally angry.',
  'predicted_label': 'anger',
  'probs': array([6.2707477e-05, 3.6282256e-06, 6.9331044e-08, 9.9986184e-01,
         7.1769966e-05, 4.8917297e-09], dtype=float32)},
 {'sentence': 'I made a smoothie, but forgot the lid and it exploded everywhere — I just froze, horrified.',
  'predicted_label': 'anger',
  'probs': array([1.17533036e-01, 1.02796536e-02, 3.53975745e-04, 8.68766189e-01,
         3.06577142e-03, 1.33023104e-06], dtype=float32)},
 {'sentence': 'A few minutes later, my dog licked my hand and I felt like everything would be okay.',
  'predicted_label': 'joy',
  'probs': array([7.7675737e-02, 8.0549490e-01, 3.1035743e-04, 1.1536691e-01,
         1.1510465e-03, 1.0482626e-06], dtype=float32)},
 {'sentence': 'I sent a risky text and immediately regretted it.',
  'predicted_label': 'sadness',
  'probs': array([7.7254379e-01, 1.4254005e-01, 2.8409139e-04, 7.7711113e-02,
         6.9201966e-03, 7.8285109e-07]