In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments
from peft import LoraConfig, TaskType, get_peft_model
import plotly.graph_objects as go
from datasets import Dataset
import pandas as pd
import numpy as np
import evaluate
import evaluate
import json
import re

In [None]:
def process_jsonl(input_file, tokenizer, sentiment_mapping):
    """
    Processes a JSONL file and returns a list of dictionaries with tokens and labels.
    
    Args:
        input_file (str): Path to the input JSONL file.
        tokenizer (AutoTokenizer): Hugging Face tokenizer.
        sentiment_mapping (dict): Mapping from sentiment strings to standardized labels.
        
    Returns:
        list: A list of dictionaries with 'tokens' and 'labels'.
    """
    def clean_word(word):
        """Remove unnecessary spaces from tokens."""
        # Strip only spaces, not punctuation
        return word.strip()

    def split_text_into_tokens(text):
        """
        Splits text into tokens, treating punctuation as separate tokens.
        Example: "Hello, world!" -> ["Hello", ",", "world", "!"]
        """
        return re.findall(r'\w+|[^\w\s]', text, re.UNICODE)

    # Open and read the input JSONL file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]

    processed_data = []

    # Iterate through each review in the data
    for item in data:
        text = item['text']
        labels = item.get('label', [])  # Use .get() to handle missing 'label' fields

        # Tokenize text, splitting punctuation into separate tokens
        tokens = split_text_into_tokens(text)
        token_offsets = []
        current_pos = 0

        # Find character positions for each token
        for token in tokens:
            start = text.find(token, current_pos)
            end = start + len(token)
            token_offsets.append((start, end))
            current_pos = end

        # Initialize labels for each token as "O"
        token_labels = ["O"] * len(tokens)

        # Assign labels based on sentiment spans
        for start, end, sentiment in labels:
            # Standardize sentiment label
            sentiment_standard = sentiment_mapping.get(sentiment, "O")
            if sentiment_standard == "O":
                continue  # Skip if sentiment is not recognized

            for i, (token_start, token_end) in enumerate(token_offsets):
                if token_start >= start and token_end <= end:
                    if token_start == start:
                        token_labels[i] = f"B-{sentiment_standard}"
                    else:
                        token_labels[i] = f"I-{sentiment_standard}"

        # Clean tokens and remove any that are empty after cleaning
        cleaned_tokens = [clean_word(token) for token in tokens]
        cleaned_tokens, token_labels = zip(*[
            (token, label) for token, label in zip(cleaned_tokens, token_labels) if token
        ])

        # Append the processed entry
        processed_data.append({
            "tokens": list(cleaned_tokens),
            "labels": list(token_labels)
        })

    return processed_data


# Example usage:
tokenizer = AutoTokenizer.from_pretrained(
    "sdadas/polish-gpt2-medium", use_fast=True, add_prefix_space=True
)
tokenizer.pad_token = tokenizer.eos_token
# Proceed with your code
sentiment_mapping = {
    'Negative': 'Negative',
    'Neutral': 'Neutral',
    'Positive': 'Positive'
}
processed_data = process_jsonl("patryk.jsonl", tokenizer, sentiment_mapping)




In [16]:
# Create a Hugging Face Dataset
dataset = Dataset.from_pandas(pd.DataFrame(processed_data))
print("Dataset example:")
print(dataset[0])

Dataset example:
{'tokens': ['Lakier', 'roweru', 'bardzo', 'kiepskiej', 'jakości', 'robią', 'się', 'odpryski', 'nie', 'wiadomo', 'od', 'czego', 'rower', 'ładny', 'wygodny', 'ale', 'po', '3', 'miesiącach', 'użytkowania', 'widoczne', 'odpryski', 'lakieru', 'czego', 'za', 'taką', 'cenę', 'nie', 'powinno', 'być', 'Oczywiście', 'producent', 'twierdzi', 'że', 'są', 'to', 'wady', 'mechaniczne', 'dziecko', 'ma', 'w', 'lepszym', 'stanie', 'lakier', 'na', 'rowerze', 'ale', 'nie', 'z', 'tej', 'firmy', 'ODRADZAM', 'ZAKUP', 'Z', 'TEGO', 'POWODU', 'SZKODA', 'TYLE', 'KASY', 'I', 'NERWÓW', 'chyba', 'ze', 'rower', 'będzie', 'stał', 'nieużywany', 'za', 'szybą', 'Na', 'zakończenie', 'powiem', 'tak', 'porównując', 'lakier', 'zwykły', 'do', 'paznokci', 'a', 'hybrydę', 'wiadomo', 'w', 'tańszym', 'zwykłym', 'lakierze', 'robią', 'się', 'odpryski', 'a', 'lepszym', 'nie'], 'labels': ['O', 'O', 'O', 'B-Negative', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Positive', 'B-Positive', 'O', 'O', 'O', 'O', 'O', 'B

In [17]:
# Define label list including "O" for tokens outside any entity
label_list = ["O", "B-Negative", "I-Negative", "B-Positive", "I-Positive", "B-Neutral", "I-Neutral"]

# Create mappings from label to ID and ID to label
label_to_id = {label: idx for idx, label in enumerate(label_list)}
id_to_label = {idx: label for label, idx in label_to_id.items()}

print("Label to ID Mapping:")
print(label_to_id)

print("\nID to Label Mapping:")
print(id_to_label)

Label to ID Mapping:
{'O': 0, 'B-Negative': 1, 'I-Negative': 2, 'B-Positive': 3, 'I-Positive': 4, 'B-Neutral': 5, 'I-Neutral': 6}

ID to Label Mapping:
{0: 'O', 1: 'B-Negative', 2: 'I-Negative', 3: 'B-Positive', 4: 'I-Positive', 5: 'B-Neutral', 6: 'I-Neutral'}


In [18]:
from transformers import DataCollatorForTokenClassification

def tokenize_and_align_labels(examples):
    """
    Tokenizes the input texts and aligns the labels with the tokens.
    
    Args:
        examples (dict): Dictionary containing 'tokens' and 'labels'.
        
    Returns:
        dict: Tokenized inputs with aligned labels.
    """
    tokenized_inputs = tokenizer(
        examples['tokens'],
        is_split_into_words=True,
        truncation=True,
        padding='max_length',
        max_length=128,
        return_offsets_mapping=True
    )
    
    labels = []
    for i, label in enumerate(examples['labels']):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to words
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)  # Special tokens
            elif word_idx != previous_word_idx:
                # Beginning of a word
                label_ids.append(label_to_id.get(label[word_idx], 0))
            else:
                # Inside a word
                if label[word_idx].startswith("B-"):
                    label_ids.append(label_to_id.get(label[word_idx].replace("B-", "I-"), 0))
                else:
                    label_ids.append(label_to_id.get(label[word_idx], 0))
            previous_word_idx = word_idx
        labels.append(label_ids)
    
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Apply the tokenization and alignment
tokenized_datasets = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=['tokens', 'labels']
)

print("Tokenized Dataset Example:")
print(tokenized_datasets[0])

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenized Dataset Example:
{'labels': [0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, -100, -100, -100, -100, -100, -100], 'input_ids': [573, 301, 523, 29345, 702, 11265, 775, 5059, 5166, 309, 672, 368, 452, 304, 3151, 357, 1386, 5557, 24161, 31670, 478, 291, 719, 10542, 15268, 13048, 672, 368, 452, 46987, 383, 1386, 313, 2594, 6908, 304, 3438, 739, 7083, 15271, 6905, 337, 543, 339, 18822, 42797, 2846, 438, 264, 16163, 1619, 25509, 293, 22660, 478, 304, 268, 725, 3390, 18373, 4069, 3454, 6300, 556, 6197, 57, 52, 556, 14652, 8076, 44938, 9622, 57, 17763, 8337, 7248, 16778, 6266, 431, 15068, 61, 598, 656, 4745, 59, 11390, 1491, 498, 5557, 626, 2174, 34416, 3506, 313, 48395, 

In [19]:
# Split the dataset into training and evaluation sets (e.g., 80% train, 20% test)
tokenized_datasets = tokenized_datasets.train_test_split(test_size=0.2, seed=42)

# Access the 'train' and 'test' splits
train_dataset = tokenized_datasets['train']
eval_dataset = tokenized_datasets['test']

print(f"\nNumber of training samples: {len(train_dataset)}")
print(f"Number of evaluation samples: {len(eval_dataset)}")


Number of training samples: 240
Number of evaluation samples: 60


In [20]:
data_collator = DataCollatorForTokenClassification(tokenizer)

In [21]:
# Initialize the model
foundation_model = AutoModelForTokenClassification.from_pretrained(
    "sdadas/polish-gpt2-medium",
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)
foundation_model

config.json:   0%|          | 0.00/837 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.53G [00:00<?, ?B/s]

Some weights of GPT2ForTokenClassification were not initialized from the model checkpoint at sdadas/polish-gpt2-medium and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForTokenClassification(
  (transformer): GPT2Model(
    (wte): Embedding(51200, 1024)
    (wpe): Embedding(2048, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): FastGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Linear(in_features=1024, out_features=7

In [22]:
for name, module in foundation_model.named_modules():
    print(name)


transformer
transformer.wte
transformer.wpe
transformer.drop
transformer.h
transformer.h.0
transformer.h.0.ln_1
transformer.h.0.attn
transformer.h.0.attn.c_attn
transformer.h.0.attn.c_proj
transformer.h.0.attn.attn_dropout
transformer.h.0.attn.resid_dropout
transformer.h.0.ln_2
transformer.h.0.mlp
transformer.h.0.mlp.c_fc
transformer.h.0.mlp.c_proj
transformer.h.0.mlp.act
transformer.h.0.mlp.dropout
transformer.h.1
transformer.h.1.ln_1
transformer.h.1.attn
transformer.h.1.attn.c_attn
transformer.h.1.attn.c_proj
transformer.h.1.attn.attn_dropout
transformer.h.1.attn.resid_dropout
transformer.h.1.ln_2
transformer.h.1.mlp
transformer.h.1.mlp.c_fc
transformer.h.1.mlp.c_proj
transformer.h.1.mlp.act
transformer.h.1.mlp.dropout
transformer.h.2
transformer.h.2.ln_1
transformer.h.2.attn
transformer.h.2.attn.c_attn
transformer.h.2.attn.c_proj
transformer.h.2.attn.attn_dropout
transformer.h.2.attn.resid_dropout
transformer.h.2.ln_2
transformer.h.2.mlp
transformer.h.2.mlp.c_fc
transformer.h.2.mlp

In [23]:

print(list(TaskType))

lora_config = LoraConfig(
    task_type=TaskType.TOKEN_CLS,          # Correct task type for token-level tasks
    r=64,                                  # Rank of LoRA; adjust as needed
    lora_alpha=32,                         # Scaling factor; adjust as needed
    lora_dropout=0.05,                     # Dropout probability
    # target_modules=["classifier"]           # Correct target module(s)
)

[<TaskType.SEQ_CLS: 'SEQ_CLS'>, <TaskType.SEQ_2_SEQ_LM: 'SEQ_2_SEQ_LM'>, <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, <TaskType.TOKEN_CLS: 'TOKEN_CLS'>, <TaskType.QUESTION_ANS: 'QUESTION_ANS'>, <TaskType.FEATURE_EXTRACTION: 'FEATURE_EXTRACTION'>]


In [24]:

peft_model = get_peft_model(foundation_model, lora_config)

peft_model.print_trainable_parameters()



trainable params: 6,298,631 || all params: 363,143,182 || trainable%: 1.7345


In [25]:
peft_model

PeftModelForTokenClassification(
  (base_model): LoraModel(
    (model): GPT2ForTokenClassification(
      (transformer): GPT2Model(
        (wte): Embedding(51200, 1024)
        (wpe): Embedding(2048, 1024)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-23): 24 x GPT2Block(
            (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2SdpaAttention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=3072, nx=1024)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1024, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embeddi

In [26]:


metric = evaluate.load("seqeval")

def compute_metrics(p):
    
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [
        [id_to_label[label] for label in label_seq if label != -100]
        for label_seq in labels
    ]
    true_predictions = [
        [id_to_label[pred] for (pred, label) in zip(pred_seq, label_seq) if label != -100]
        for pred_seq, label_seq in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
)



In [27]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

results = trainer.evaluate()
print("\nEvaluation Results:")
print(results)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/150 [00:00<?, ?it/s]

{'loss': 2.0891, 'grad_norm': 16.019893646240234, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.728405475616455, 'eval_precision': 0.006798096532970768, 'eval_recall': 0.07518796992481203, 'eval_f1': 0.012468827930174562, 'eval_accuracy': 0.2973993288590604, 'eval_runtime': 2.6065, 'eval_samples_per_second': 23.02, 'eval_steps_per_second': 1.535, 'epoch': 1.0}
{'loss': 1.8032, 'grad_norm': 15.863136291503906, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.33}
{'loss': 1.4943, 'grad_norm': 14.172293663024902, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.3247723579406738, 'eval_precision': 0.00641025641025641, 'eval_recall': 0.045112781954887216, 'eval_f1': 0.011225444340505146, 'eval_accuracy': 0.5398489932885906, 'eval_runtime': 2.3228, 'eval_samples_per_second': 25.831, 'eval_steps_per_second': 1.722, 'epoch': 2.0}
{'loss': 1.2492, 'grad_norm': 12.193614959716797, 'learning_rate': 1.4666666666666666e-05, 'epoch': 2.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0040998458862305, 'eval_precision': 0.007228915662650603, 'eval_recall': 0.022556390977443608, 'eval_f1': 0.010948905109489052, 'eval_accuracy': 0.7479026845637584, 'eval_runtime': 2.4387, 'eval_samples_per_second': 24.603, 'eval_steps_per_second': 1.64, 'epoch': 3.0}
{'loss': 1.0222, 'grad_norm': 9.366022109985352, 'learning_rate': 1.3333333333333333e-05, 'epoch': 3.33}
{'loss': 0.8577, 'grad_norm': 7.331202507019043, 'learning_rate': 1.2e-05, 'epoch': 4.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 0.7719802260398865, 'eval_precision': 0.008403361344537815, 'eval_recall': 0.007518796992481203, 'eval_f1': 0.007936507936507938, 'eval_accuracy': 0.8653523489932886, 'eval_runtime': 2.9089, 'eval_samples_per_second': 20.626, 'eval_steps_per_second': 1.375, 'epoch': 4.0}
{'loss': 0.7116, 'grad_norm': 5.934869766235352, 'learning_rate': 1.0666666666666667e-05, 'epoch': 4.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 0.6231202483177185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8947147651006712, 'eval_runtime': 2.8262, 'eval_samples_per_second': 21.23, 'eval_steps_per_second': 1.415, 'epoch': 5.0}
{'loss': 0.6156, 'grad_norm': 4.2790727615356445, 'learning_rate': 9.333333333333334e-06, 'epoch': 5.33}
{'loss': 0.5502, 'grad_norm': 3.1852235794067383, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 0.540981113910675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9068791946308725, 'eval_runtime': 2.5041, 'eval_samples_per_second': 23.961, 'eval_steps_per_second': 1.597, 'epoch': 6.0}
{'loss': 0.4944, 'grad_norm': 2.98234486579895, 'learning_rate': 6.666666666666667e-06, 'epoch': 6.67}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 0.502855658531189, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9085570469798657, 'eval_runtime': 2.2561, 'eval_samples_per_second': 26.594, 'eval_steps_per_second': 1.773, 'epoch': 7.0}
{'loss': 0.5031, 'grad_norm': 2.14072847366333, 'learning_rate': 5.333333333333334e-06, 'epoch': 7.33}
{'loss': 0.4583, 'grad_norm': 1.2190487384796143, 'learning_rate': 4.000000000000001e-06, 'epoch': 8.0}


  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.4872707426548004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9089765100671141, 'eval_runtime': 2.2275, 'eval_samples_per_second': 26.936, 'eval_steps_per_second': 1.796, 'epoch': 8.0}
{'loss': 0.438, 'grad_norm': 1.6391103267669678, 'learning_rate': 2.666666666666667e-06, 'epoch': 8.67}


  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.48108160495758057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9093959731543624, 'eval_runtime': 2.2163, 'eval_samples_per_second': 27.072, 'eval_steps_per_second': 1.805, 'epoch': 9.0}
{'loss': 0.4401, 'grad_norm': 1.393377661705017, 'learning_rate': 1.3333333333333334e-06, 'epoch': 9.33}
{'loss': 0.4517, 'grad_norm': 1.0753583908081055, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.47935137152671814, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9098154362416108, 'eval_runtime': 2.2537, 'eval_samples_per_second': 26.623, 'eval_steps_per_second': 1.775, 'epoch': 10.0}
{'train_runtime': 404.6252, 'train_samples_per_second': 5.931, 'train_steps_per_second': 0.371, 'train_loss': 0.8785693836212158, 'epoch': 10.0}


  0%|          | 0/4 [00:00<?, ?it/s]


Evaluation Results:
{'eval_loss': 1.728405475616455, 'eval_precision': 0.006798096532970768, 'eval_recall': 0.07518796992481203, 'eval_f1': 0.012468827930174562, 'eval_accuracy': 0.2973993288590604, 'eval_runtime': 2.6141, 'eval_samples_per_second': 22.953, 'eval_steps_per_second': 1.53, 'epoch': 10.0}


In [28]:
results = trainer.evaluate()
print("\nEvaluation Results:")
print(results)

  0%|          | 0/4 [00:00<?, ?it/s]


Evaluation Results:
{'eval_loss': 1.728405475616455, 'eval_precision': 0.006798096532970768, 'eval_recall': 0.07518796992481203, 'eval_f1': 0.012468827930174562, 'eval_accuracy': 0.2973993288590604, 'eval_runtime': 2.4149, 'eval_samples_per_second': 24.846, 'eval_steps_per_second': 1.656, 'epoch': 10.0}


In [33]:
from transformers import pipeline

nlp = pipeline(
    "token-classification",
    model=peft_model,
    tokenizer=tokenizer,
    aggregation_strategy="simple"
)

inference_results = []

example_texts = [
    "Nie jestem zadowolony z zakupu. Słuchawki są niewygodne i głośność jest irytująca.",
    "Zaakceptowałem ofertę i kupiłem nowy telefon, który działa bez zarzutu.",
    "Pisanie opinii o produkcie było dla mnie bardzo łatwe i szybkie. ",
    "One są wszystkie, luzacki, nudne, wporzadku, groźny, mieszane, fajny, zły, nie dobry,  dobra, pozytywne, piękne, smutne. ",
    "Całe to jebane zycie to jeden wielki dramat. ",
    "Chuj kurwa chuj. ",
]

for text in example_texts:
    predictions = nlp(text)
    inference_results.append({
        "text": text,
        "predictions": predictions
    })
    print(f"\nText: {text}")
    print("Inference Results:")
    print(predictions)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
The model 'PeftModelForTokenClassification' is not supported for token-classification. Supported models are ['AlbertForTokenClassification', 'BertForTokenClassification', 'BigBirdForTokenClassification', 'BioGptForTokenClassification', 'BloomForTokenClassification', 'BrosForTokenClassification', 'CamembertForTokenClassification', 'CanineForTokenClassification', 'ConvBertForTokenClassification', 'Data2VecTextForTokenClassification', 'DebertaForTokenClassification', 'DebertaV2ForTokenClassification', 'DistilBertForTokenClassification', 'ElectraForTokenClassification', 'ErnieForTokenClassification', 'ErnieMForTokenClassification', 'EsmForTokenClassification', 'FalconForTokenClassification', 'FlaubertForTokenClassification', 'FNetForTokenClassification', 'FunnelForTokenClassification', 'GemmaForTokenClassification', 'Gemma2ForTokenClassification'


Text: Nie jestem zadowolony z zakupu. Słuchawki są niewygodne i głośność jest irytująca.
Inference Results:
[{'entity_group': 'Neutral', 'score': 0.22114557, 'word': ' Nie', 'start': 0, 'end': 3}, {'entity_group': 'Positive', 'score': 0.32489192, 'word': ' jestem', 'start': 3, 'end': 10}, {'entity_group': 'Negative', 'score': 0.34837538, 'word': ' zakupu', 'start': 23, 'end': 30}, {'entity_group': 'Negative', 'score': 0.34270105, 'word': '.', 'start': 30, 'end': 31}, {'entity_group': 'Negative', 'score': 0.4033354, 'word': ' Słu', 'start': 31, 'end': 35}, {'entity_group': 'Positive', 'score': 0.2079943, 'word': 'cha', 'start': 35, 'end': 38}, {'entity_group': 'Negative', 'score': 0.6835422, 'word': 'wki', 'start': 38, 'end': 41}, {'entity_group': 'Neutral', 'score': 0.26501462, 'word': ' są', 'start': 41, 'end': 44}, {'entity_group': 'Negative', 'score': 0.48030657, 'word': ' niewygodne', 'start': 44, 'end': 55}, {'entity_group': 'Negative', 'score': 0.41461366, 'word': ' i', 'start':

In [34]:
sentiment_colors = {
    'Negative': 'red',
    'Neutral': 'gray',
    'Positive': 'green'
}

In [None]:
def get_sentiment(label):
    """Extract the base sentiment from the label."""
    if label.startswith('B-') or label.startswith('I-'):
        return label.split('-', 1)[1]
    return label

for result in inference_results:
    text = result['text']
    predictions = result['predictions']
    
    words = text.split()
    
    sentiments = []
    scores = []
    
    word_sentiments = ['O'] * len(words)
    word_scores = [0.0] * len(words)
    
    for pred in predictions:
        label = pred['entity']
        sentiment = get_sentiment(label)
        score = pred['score']
        word = pred['word'].replace('</w>', '').strip()
        
        for idx, w in enumerate(words):
            clean_w = re.sub(r'[^\w]', '', w)
            if word.lower() == clean_w.lower():
                word_sentiments[idx] = sentiment
                word_scores[idx] = score
                break
    
    colors = [sentiment_colors.get(sentiment, 'black') for sentiment in word_sentiments]
    
    hover_texts = [f"Sentiment: {sentiment}<br>Score: {score:.2f}" 
                   for sentiment, score in zip(word_sentiments, word_scores)]
    
    fig = go.Figure()
    
    x = 0
    y = 0
    spacing = 0.5  # Adjust spacing between words
    
    for i, word in enumerate(words):
        fig.add_trace(go.Scatter(
            x=[x],
            y=[y],
            text=[word],
            mode='text',
            textfont=dict(color=colors[i], size=16),
            hoverinfo='text',
            hovertext=hover_texts[i],
            showlegend=False
        ))
        # Increment x position
        x += len(word) * 0.1 + spacing
    
    # Update layout
    fig.update_layout(
        title=f"Inference Results",
        xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        margin=dict(l=20, r=20, t=50, b=20)
    )
    
    # Display the figure
    fig.show()