In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments, DataCollatorForTokenClassification, pipeline
from peft import LoraConfig, TaskType, get_peft_model
import plotly.graph_objects as go
from datasets import Dataset
import pandas as pd
import numpy as np
import evaluate
import json
import re

In [None]:
def process_jsonl(input_file, tokenizer, sentiment_mapping):

    def clean_word(word):
        return word.strip()

    def split_text_into_tokens(text):
        return re.findall(r'\w+|[^\w\s]', text, re.UNICODE)

    with open(input_file, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]

    processed_data = []

    for item in data:
        text = item['text']
        labels = item.get('label', [])

        tokens = split_text_into_tokens(text)
        token_offsets = []
        current_pos = 0

        for token in tokens:
            start = text.find(token, current_pos)
            end = start + len(token)
            token_offsets.append((start, end))
            current_pos = end

        token_labels = ["O"] * len(tokens)

        for start, end, sentiment in labels:
            sentiment_standard = sentiment_mapping.get(sentiment, "O")
            if sentiment_standard == "O":
                continue

            for i, (token_start, token_end) in enumerate(token_offsets):
                if token_start >= start and token_end <= end:
                    if token_start == start:
                        token_labels[i] = f"B-{sentiment_standard}"
                    else:
                        token_labels[i] = f"I-{sentiment_standard}"

        cleaned_tokens = [clean_word(token) for token in tokens]
        cleaned_tokens, token_labels = zip(*[
            (token, label) for token, label in zip(cleaned_tokens, token_labels) if token
        ])

        processed_data.append({
            "tokens": list(cleaned_tokens),
            "labels": list(token_labels)
        })

    return processed_data


tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-base-cased", use_fast=True)
sentiment_mapping = {
    'Negative': 'Negative',
    'Neutral': 'Neutral',
    'Positive': 'Positive'
}
processed_data = process_jsonl("patryk.jsonl", tokenizer, sentiment_mapping)

In [21]:
dataset = Dataset.from_pandas(pd.DataFrame(processed_data))
print("Dataset example:")
print(dataset[0])

Dataset example:
{'tokens': ['Lakier', 'roweru', 'bardzo', 'kiepskiej', 'jakości', ',', 'robią', 'się', 'odpryski', 'nie', 'wiadomo', 'od', 'czego', 'rower', 'ładny', 'wygodny', 'ale', 'po', '3', 'miesiącach', 'użytkowania', 'widoczne', 'odpryski', 'lakieru', 'czego', 'za', 'taką', 'cenę', 'nie', 'powinno', 'być', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', '!', 'Oczywiście', 'producent', 'twierdzi', 'że', 'są', 'to', 'wady', 'mechaniczne', ',', 'dziecko', 'ma', 'w', 'lepszym', 'stanie', 'lakier', 'na', 'rowerze', 'ale', 'nie', 'z', 'tej', 'firmy', 'ODRADZAM', 'ZAKUP', 'Z', 'TEGO', 'POWODU', 'SZKODA', 'TYLE', 'KASY', 'I', 'NERWÓW', 'chyba', 'ze', 'rower', 'będzie', 'stał', 'nieużywany', 'za', 'szybą', '.', 'Na', 'zakończenie', 'powiem', 'tak', 'porównując', 'lakier', 'zwykły', 'do', 'paznokci', 'a', 'hybrydę', 'wiadomo', 'w', 'tańszym', 'zwykłym', 'lakierze', 'robią', 'się', 'odpryski', 'a', 'lepszym', 'nie', '!', '!', '!', '!', '

In [22]:
label_list = ["O", "B-Negative", "I-Negative", "B-Positive", "I-Positive", "B-Neutral", "I-Neutral"]

label_to_id = {label: idx for idx, label in enumerate(label_list)}
id_to_label = {idx: label for label, idx in label_to_id.items()}

print("Label to ID Mapping:")
print(label_to_id)

print("\nID to Label Mapping:")
print(id_to_label)

Label to ID Mapping:
{'O': 0, 'B-Negative': 1, 'I-Negative': 2, 'B-Positive': 3, 'I-Positive': 4, 'B-Neutral': 5, 'I-Neutral': 6}

ID to Label Mapping:
{0: 'O', 1: 'B-Negative', 2: 'I-Negative', 3: 'B-Positive', 4: 'I-Positive', 5: 'B-Neutral', 6: 'I-Neutral'}


In [None]:


def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples['tokens'],
        is_split_into_words=True,
        truncation=True,
        padding='max_length',
        max_length=128,
        return_offsets_mapping=True
    )
    
    labels = []
    for i, label in enumerate(examples['labels']):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to words
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)  # Special tokens
            elif word_idx != previous_word_idx:
                # Beginning of a word
                label_ids.append(label_to_id.get(label[word_idx], 0))
            else:
                # Inside a word
                if label[word_idx].startswith("B-"):
                    label_ids.append(label_to_id.get(label[word_idx].replace("B-", "I-"), 0))
                else:
                    label_ids.append(label_to_id.get(label[word_idx], 0))
            previous_word_idx = word_idx
        labels.append(label_ids)
    
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Apply the tokenization and alignment
tokenized_datasets = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=['tokens', 'labels']
)

print("Tokenized Dataset Example:")
print(tokenized_datasets[0])

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenized Dataset Example:
{'labels': [-100, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, -100], 'input_ids': [0, 4071, 13281, 40876, 2450, 17696, 2257, 8842, 1947, 10362, 2022, 2021, 7046, 2113, 1997, 4610, 2173, 2784, 20290, 31965, 12858, 2067, 2199, 2184, 1034, 10947, 18496, 15568, 2021, 7046, 2113, 38534, 4236, 2784, 2163, 5117, 6598, 1997, 5606, 2458, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 1725, 5581, 16309, 4869, 2040, 2264, 2063, 19225, 46568, 1947, 6067, 2185, 1019, 20465, 3110, 2050, 13281, 1998, 23922, 2199, 1997, 1046, 2320, 3780, 9993, 3864, 5327, 14609, 7813, 6901, 1

In [24]:
# Split the dataset into training and evaluation sets (e.g., 80% train, 20% test)
tokenized_datasets = tokenized_datasets.train_test_split(test_size=0.2, seed=42)

# Access the 'train' and 'test' splits
train_dataset = tokenized_datasets['train']
eval_dataset = tokenized_datasets['test']

print(f"\nNumber of training samples: {len(train_dataset)}")
print(f"Number of evaluation samples: {len(eval_dataset)}")


Number of training samples: 240
Number of evaluation samples: 60


In [25]:
data_collator = DataCollatorForTokenClassification(tokenizer)

In [26]:
# Initialize the model
foundation_model = AutoModelForTokenClassification.from_pretrained(
    "allegro/herbert-base-cased",
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)
foundation_model

Some weights of BertForTokenClassification were not initialized from the model checkpoint at allegro/herbert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(50000, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [27]:
for name, module in foundation_model.named_modules():
    print(name)


bert
bert.embeddings
bert.embeddings.word_embeddings
bert.embeddings.position_embeddings
bert.embeddings.token_type_embeddings
bert.embeddings.LayerNorm
bert.embeddings.dropout
bert.encoder
bert.encoder.layer
bert.encoder.layer.0
bert.encoder.layer.0.attention
bert.encoder.layer.0.attention.self
bert.encoder.layer.0.attention.self.query
bert.encoder.layer.0.attention.self.key
bert.encoder.layer.0.attention.self.value
bert.encoder.layer.0.attention.self.dropout
bert.encoder.layer.0.attention.output
bert.encoder.layer.0.attention.output.dense
bert.encoder.layer.0.attention.output.LayerNorm
bert.encoder.layer.0.attention.output.dropout
bert.encoder.layer.0.intermediate
bert.encoder.layer.0.intermediate.dense
bert.encoder.layer.0.intermediate.intermediate_act_fn
bert.encoder.layer.0.output
bert.encoder.layer.0.output.dense
bert.encoder.layer.0.output.LayerNorm
bert.encoder.layer.0.output.dropout
bert.encoder.layer.1
bert.encoder.layer.1.attention
bert.encoder.layer.1.attention.self
bert.e

In [28]:

print(list(TaskType))

lora_config = LoraConfig(
    task_type=TaskType.TOKEN_CLS,          # Correct task type for token-level tasks
    r=64,                                  # Rank of LoRA; adjust as needed
    lora_alpha=32,                         # Scaling factor; adjust as needed
    lora_dropout=0.05,                     # Dropout probability
    # target_modules=["classifier"]           # Correct target module(s)
)

[<TaskType.SEQ_CLS: 'SEQ_CLS'>, <TaskType.SEQ_2_SEQ_LM: 'SEQ_2_SEQ_LM'>, <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, <TaskType.TOKEN_CLS: 'TOKEN_CLS'>, <TaskType.QUESTION_ANS: 'QUESTION_ANS'>, <TaskType.FEATURE_EXTRACTION: 'FEATURE_EXTRACTION'>]


In [29]:

peft_model = get_peft_model(foundation_model, lora_config)

peft_model.print_trainable_parameters()

trainable params: 2,364,679 || all params: 126,222,350 || trainable%: 1.8734


In [30]:
peft_model

PeftModelForTokenClassification(
  (base_model): LoraModel(
    (model): BertForTokenClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(50000, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Li

In [31]:
metric = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels_all = [
        [id_to_label[label] for label in label_seq if label != -100]
        for label_seq in labels
    ]
    true_predictions_all = [
        [id_to_label[pred] for (pred, label) in zip(pred_seq, label_seq) if label != -100]
        for pred_seq, label_seq in zip(predictions, labels)
    ]

    results_all = metric.compute(predictions=true_predictions_all, references=true_labels_all)

    true_labels_without_O = [
        [id_to_label[label] for label in label_seq if label != -100 and id_to_label[label] != "O"]
        for label_seq in labels
    ]
    true_predictions_without_O = [
        [id_to_label[pred] for (pred, label) in zip(pred_seq, label_seq) if label != -100 and id_to_label[label] != "O"]
        for pred_seq, label_seq in zip(predictions, labels)
    ]

    results_without_O = metric.compute(predictions=true_predictions_without_O, references=true_labels_without_O)
    
    return {
        "precision_all": results_all.get("overall_precision", 0.0),
        "recall_all": results_all.get("overall_recall", 0.0),
        "f1_all": results_all.get("overall_f1", 0.0),
        "accuracy_all": results_all.get("overall_accuracy", 0.0),
        "precision_without_O": results_without_O.get("overall_precision", 0.0),
        "recall_without_O": results_without_O.get("overall_recall", 0.0),
        "f1_without_O": results_without_O.get("overall_f1", 0.0),
        "accuracy_without_O": results_without_O.get("overall_accuracy", 0.0),
    }


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_all",
    greater_is_better=True,
)


`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead



In [32]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  0%|          | 0/150 [00:00<?, ?it/s]

{'loss': 1.9509, 'grad_norm': 14.967206001281738, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.867238163948059, 'eval_precision_all': 0.00951625693893735, 'eval_recall_all': 0.09448818897637795, 'eval_f1_all': 0.01729106628242075, 'eval_accuracy_all': 0.12170927127050744, 'eval_precision_without_O': 0.20168067226890757, 'eval_recall_without_O': 0.1889763779527559, 'eval_f1_without_O': 0.1951219512195122, 'eval_accuracy_without_O': 0.2946859903381642, 'eval_runtime': 0.6033, 'eval_samples_per_second': 99.45, 'eval_steps_per_second': 6.63, 'epoch': 1.0}
{'loss': 1.8752, 'grad_norm': 15.969340324401855, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.33}
{'loss': 1.8121, 'grad_norm': 15.5262451171875, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.7668689489364624, 'eval_precision_all': 0.011073253833049404, 'eval_recall_all': 0.10236220472440945, 'eval_f1_all': 0.019984627209838585, 'eval_accuracy_all': 0.24303700877527662, 'eval_precision_without_O': 0.21100917431192662, 'eval_recall_without_O': 0.18110236220472442, 'eval_f1_without_O': 0.19491525423728814, 'eval_accuracy_without_O': 0.2608695652173913, 'eval_runtime': 0.6195, 'eval_samples_per_second': 96.859, 'eval_steps_per_second': 6.457, 'epoch': 2.0}
{'loss': 1.7437, 'grad_norm': 15.345121383666992, 'learning_rate': 1.4666666666666666e-05, 'epoch': 2.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.673896312713623, 'eval_precision_all': 0.0126953125, 'eval_recall_all': 0.10236220472440945, 'eval_f1_all': 0.022589052997393572, 'eval_accuracy_all': 0.3864937046928653, 'eval_precision_without_O': 0.21839080459770116, 'eval_recall_without_O': 0.14960629921259844, 'eval_f1_without_O': 0.17757009345794394, 'eval_accuracy_without_O': 0.21739130434782608, 'eval_runtime': 0.5968, 'eval_samples_per_second': 100.539, 'eval_steps_per_second': 6.703, 'epoch': 3.0}
{'loss': 1.6905, 'grad_norm': 14.653937339782715, 'learning_rate': 1.3333333333333333e-05, 'epoch': 3.33}
{'loss': 1.6326, 'grad_norm': 13.761385917663574, 'learning_rate': 1.2e-05, 'epoch': 4.0}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.5889073610305786, 'eval_precision_all': 0.01288659793814433, 'eval_recall_all': 0.07874015748031496, 'eval_f1_all': 0.0221483942414175, 'eval_accuracy_all': 0.5566577642121328, 'eval_precision_without_O': 0.12698412698412698, 'eval_recall_without_O': 0.06299212598425197, 'eval_f1_without_O': 0.08421052631578947, 'eval_accuracy_without_O': 0.14009661835748793, 'eval_runtime': 0.5931, 'eval_samples_per_second': 101.167, 'eval_steps_per_second': 6.744, 'epoch': 4.0}
{'loss': 1.5824, 'grad_norm': 14.73233413696289, 'learning_rate': 1.0666666666666667e-05, 'epoch': 4.67}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.5129786729812622, 'eval_precision_all': 0.0019342359767891683, 'eval_recall_all': 0.007874015748031496, 'eval_f1_all': 0.0031055900621118015, 'eval_accuracy_all': 0.7046928653185807, 'eval_precision_without_O': 0.046511627906976744, 'eval_recall_without_O': 0.015748031496062992, 'eval_f1_without_O': 0.023529411764705882, 'eval_accuracy_without_O': 0.08695652173913043, 'eval_runtime': 0.5919, 'eval_samples_per_second': 101.36, 'eval_steps_per_second': 6.757, 'epoch': 5.0}
{'loss': 1.526, 'grad_norm': 13.945833206176758, 'learning_rate': 9.333333333333334e-06, 'epoch': 5.33}
{'loss': 1.4833, 'grad_norm': 14.279927253723145, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.0}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.447867751121521, 'eval_precision_all': 0.0, 'eval_recall_all': 0.0, 'eval_f1_all': 0.0, 'eval_accuracy_all': 0.7836703548264021, 'eval_precision_without_O': 0.0, 'eval_recall_without_O': 0.0, 'eval_f1_without_O': 0.0, 'eval_accuracy_without_O': 0.06763285024154589, 'eval_runtime': 0.5882, 'eval_samples_per_second': 102.011, 'eval_steps_per_second': 6.801, 'epoch': 6.0}
{'loss': 1.4325, 'grad_norm': 14.542745590209961, 'learning_rate': 6.666666666666667e-06, 'epoch': 6.67}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.3950330018997192, 'eval_precision_all': 0.0, 'eval_recall_all': 0.0, 'eval_f1_all': 0.0, 'eval_accuracy_all': 0.827546737886303, 'eval_precision_without_O': 0.0, 'eval_recall_without_O': 0.0, 'eval_f1_without_O': 0.0, 'eval_accuracy_without_O': 0.06763285024154589, 'eval_runtime': 0.5922, 'eval_samples_per_second': 101.319, 'eval_steps_per_second': 6.755, 'epoch': 7.0}
{'loss': 1.4098, 'grad_norm': 13.529491424560547, 'learning_rate': 5.333333333333334e-06, 'epoch': 7.33}
{'loss': 1.3887, 'grad_norm': 13.29844856262207, 'learning_rate': 4.000000000000001e-06, 'epoch': 8.0}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.356048345565796, 'eval_precision_all': 0.0, 'eval_recall_all': 0.0, 'eval_f1_all': 0.0, 'eval_accuracy_all': 0.8557802365509347, 'eval_precision_without_O': 0.0, 'eval_recall_without_O': 0.0, 'eval_f1_without_O': 0.0, 'eval_accuracy_without_O': 0.05314009661835749, 'eval_runtime': 0.5885, 'eval_samples_per_second': 101.953, 'eval_steps_per_second': 6.797, 'epoch': 8.0}
{'loss': 1.3581, 'grad_norm': 13.759032249450684, 'learning_rate': 2.666666666666667e-06, 'epoch': 8.67}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.331933617591858, 'eval_precision_all': 0.0, 'eval_recall_all': 0.0, 'eval_f1_all': 0.0, 'eval_accuracy_all': 0.8683708508202976, 'eval_precision_without_O': 0.0, 'eval_recall_without_O': 0.0, 'eval_f1_without_O': 0.0, 'eval_accuracy_without_O': 0.05314009661835749, 'eval_runtime': 0.5917, 'eval_samples_per_second': 101.408, 'eval_steps_per_second': 6.761, 'epoch': 9.0}
{'loss': 1.3383, 'grad_norm': 13.312973976135254, 'learning_rate': 1.3333333333333334e-06, 'epoch': 9.33}
{'loss': 1.3336, 'grad_norm': 13.392769813537598, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/4 [00:00<?, ?it/s]


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



{'eval_loss': 1.3234866857528687, 'eval_precision_all': 0.0, 'eval_recall_all': 0.0, 'eval_f1_all': 0.0, 'eval_accuracy_all': 0.8740938573063716, 'eval_precision_without_O': 0.0, 'eval_recall_without_O': 0.0, 'eval_f1_without_O': 0.0, 'eval_accuracy_without_O': 0.043478260869565216, 'eval_runtime': 0.5879, 'eval_samples_per_second': 102.056, 'eval_steps_per_second': 6.804, 'epoch': 10.0}
{'train_runtime': 62.773, 'train_samples_per_second': 38.233, 'train_steps_per_second': 2.39, 'train_loss': 1.570509204864502, 'epoch': 10.0}


TrainOutput(global_step=150, training_loss=1.570509204864502, metrics={'train_runtime': 62.773, 'train_samples_per_second': 38.233, 'train_steps_per_second': 2.39, 'total_flos': 161143717478400.0, 'train_loss': 1.570509204864502, 'epoch': 10.0})

In [33]:
results = trainer.evaluate()
print("\nEvaluation Results:")
print(results)

  0%|          | 0/4 [00:00<?, ?it/s]


Evaluation Results:
{'eval_loss': 1.673896312713623, 'eval_precision_all': 0.0126953125, 'eval_recall_all': 0.10236220472440945, 'eval_f1_all': 0.022589052997393572, 'eval_accuracy_all': 0.3864937046928653, 'eval_precision_without_O': 0.21839080459770116, 'eval_recall_without_O': 0.14960629921259844, 'eval_f1_without_O': 0.17757009345794394, 'eval_accuracy_without_O': 0.21739130434782608, 'eval_runtime': 0.657, 'eval_samples_per_second': 91.324, 'eval_steps_per_second': 6.088, 'epoch': 10.0}


In [34]:

nlp = pipeline(
    "token-classification",
    model=peft_model,
    tokenizer=tokenizer,
    aggregation_strategy="none"
)

inference_results = []

example_texts = [
    "Nie jestem zadowolony z zakupu. Słuchawki są niewygodne i głośność jest irytująca.",
    "Zaakceptowałem ofertę i kupiłem nowy telefon, który działa bez zarzutu.",
    "Pisanie opinii o produkcie było dla mnie bardzo łatwe i szybkie. ",
    "One są wszystkie, luzacki, nudne, wporzadku, groźny, mieszane, fajny, zły, nie dobry,  dobra, pozytywne, piękne, smutne. ",
    "Całe to jebane zycie to jeden wielki dramat. ",
    "Chuj kurwa chuj. ",
]

for text in example_texts:
    predictions = nlp(text)
    inference_results.append({
        "text": text,
        "predictions": predictions
    })
    print(f"\nText: {text}")
    print("Inference Results:")
    print(predictions)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
The model 'PeftModelForTokenClassification' is not supported for token-classification. Supported models are ['AlbertForTokenClassification', 'BertForTokenClassification', 'BigBirdForTokenClassification', 'BioGptForTokenClassification', 'BloomForTokenClassification', 'BrosForTokenClassification', 'CamembertForTokenClassification', 'CanineForTokenClassification', 'ConvBertForTokenClassification', 'Data2VecTextForTokenClassification', 'DebertaForTokenClassification', 'DebertaV2ForTokenClassification', 'DistilBertForTokenClassification', 'ElectraForTokenClassification', 'ErnieForTokenClassification', 'ErnieMForTokenClassification', 'EsmForTokenClassification', 'FalconForTokenClassification', 'FlaubertForTokenClassification', 'FNetForTokenClassification', 'FunnelForTokenClassification', 'GemmaForTokenClassification', 'Gemma2ForTokenClassification'


Text: Nie jestem zadowolony z zakupu. Słuchawki są niewygodne i głośność jest irytująca.
Inference Results:
[{'entity': 'I-Positive', 'score': 0.19360863, 'index': 1, 'word': 'Nie</w>', 'start': 0, 'end': 3}, {'entity': 'B-Neutral', 'score': 0.18819119, 'index': 2, 'word': 'jestem</w>', 'start': 4, 'end': 10}, {'entity': 'I-Positive', 'score': 0.22508347, 'index': 3, 'word': 'zadowolony</w>', 'start': 11, 'end': 21}, {'entity': 'I-Neutral', 'score': 0.20946932, 'index': 4, 'word': 'z</w>', 'start': 22, 'end': 23}, {'entity': 'I-Neutral', 'score': 0.23290597, 'index': 6, 'word': '.</w>', 'start': 30, 'end': 31}, {'entity': 'I-Neutral', 'score': 0.20850222, 'index': 8, 'word': 'cha', 'start': 35, 'end': 38}, {'entity': 'I-Neutral', 'score': 0.25907308, 'index': 9, 'word': 'wki</w>', 'start': 38, 'end': 41}, {'entity': 'I-Positive', 'score': 0.18994895, 'index': 11, 'word': 'niewy', 'start': 45, 'end': 50}, {'entity': 'I-Neutral', 'score': 0.21083762, 'index': 13, 'word': 'i</w>', 'start

In [35]:
sentiment_colors = {
    'Negative': 'red',
    'Neutral': 'gray',
    'Positive': 'green'
}

In [36]:
def get_sentiment(label):
    """Extract the base sentiment from the label."""
    if label.startswith('B-') or label.startswith('I-'):
        return label.split('-', 1)[1]
    return label

for result in inference_results:
    text = result['text']
    predictions = result['predictions']
    
    words = text.split()
    
    sentiments = []
    scores = []
    
    word_sentiments = ['O'] * len(words)
    word_scores = [0.0] * len(words)
    
    for pred in predictions:
        label = pred['entity']
        sentiment = get_sentiment(label)
        score = pred['score']
        word = pred['word'].replace('</w>', '').strip()
        
        for idx, w in enumerate(words):
            clean_w = re.sub(r'[^\w]', '', w)
            if word.lower() == clean_w.lower():
                word_sentiments[idx] = sentiment
                word_scores[idx] = score
                break
    
    colors = [sentiment_colors.get(sentiment, 'black') for sentiment in word_sentiments]
    
    hover_texts = [f"Sentiment: {sentiment}<br>Score: {score:.2f}" 
                   for sentiment, score in zip(word_sentiments, word_scores)]
    
    fig = go.Figure()
    
    x = 0
    y = 0
    spacing = 0.5  # Adjust spacing between words
    
    for i, word in enumerate(words):
        fig.add_trace(go.Scatter(
            x=[x],
            y=[y],
            text=[word],
            mode='text',
            textfont=dict(color=colors[i], size=16),
            hoverinfo='text',
            hovertext=hover_texts[i],
            showlegend=False
        ))
        # Increment x position
        x += len(word) * 0.1 + spacing
    
    # Update layout
    fig.update_layout(
        title=f"Inference Results",
        xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        margin=dict(l=20, r=20, t=50, b=20)
    )
    
    # Display the figure
    fig.show()