In [1]:
import torch
import os
print("torch.__version__:", torch.__version__)
print("torch.version.cuda:", torch.version.cuda)
print("cuda available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("gpu count:", torch.cuda.device_count())
    print("device name:", torch.cuda.get_device_name(0))

os.environ["ACCELERATE_MIXED_PRECISION"] = "no"
os.environ["TORCH_CUDNN_V8_API_DISABLED"] = "1"  # evita warning em algumas builds



torch.__version__: 2.9.0+cu126
torch.version.cuda: 12.6
cuda available: True
gpu count: 1
device name: NVIDIA GeForce RTX 3060


In [None]:
from datasets import load_dataset
dataset = load_dataset("dougtrajano/olid-br")
df_train = dataset["train"].to_pandas()
df_test = dataset["test"].to_pandas()
df_train["label"] = df_train["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)
df_test["label"] = df_test["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)


In [5]:
df_train = dataset["train"].to_pandas()
df_test = dataset["test"].to_pandas()

In [6]:
df_train.head()

Unnamed: 0,id,text,is_offensive,is_targeted,targeted_type,toxic_spans,health,ideology,insult,lgbtqphobia,other_lifestyle,physical_aspects,profanity_obscene,racism,religious_intolerance,sexism,xenophobia
0,c779826dc43f460cb18e8429ca443477,Pior do que adolescentezinhas de merda...são p...,OFF,UNT,,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",False,False,True,False,False,False,True,False,False,True,False
1,a3d7839456ae4258a70298fcf637952e,"Podia ter beijo também, pra ver se o homofóbic...",OFF,UNT,,"[24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 3...",False,False,True,False,False,False,False,False,False,False,False
2,b830374760cc44789f1493c6b228ebc1,"o monark não é racista que filha da putagem, j...",OFF,TIN,GRP,"[36, 37, 38, 39, 40, 41, 42]",False,False,True,False,False,False,False,False,False,False,False
3,73299805588d424fa2905eaf9e616cf1,fudeu minha mãe acabou de chegar em casa e eu ...,OFF,TIN,OTH,"[0, 1, 2, 3, 4, 5, 72, 73, 74]",False,False,True,False,False,False,True,False,False,False,False
4,862cf8f819814e47bb4e7f6b4b0afc73,"Vergonha,ainda segura a bandeira de um país de...",OFF,UNT,,"[58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 6...",False,True,True,True,False,False,False,False,False,False,True


In [8]:
df_train["label"] = df_train["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)
df_test["label"] = df_test["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)


In [2]:
import pandas as pd

# Carrega o arquivo TSV
df = pd.read_csv("data/all_cases.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,functionality,case_id,test_case,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
0,0,derog_neg_emote_h,1,I hate women.,hateful,women,general,hate,hate,,,1,I hate [IDENTITY_P].
1,1,derog_neg_emote_h,2,I hate trans people.,hateful,trans people,general,hate,hate,,,1,I hate [IDENTITY_P].
2,2,derog_neg_emote_h,3,I hate gay people.,hateful,gay people,general,hate,hate,,,1,I hate [IDENTITY_P].
3,3,derog_neg_emote_h,4,I hate black people.,hateful,black people,general,hate,hate,,,1,I hate [IDENTITY_P].
4,4,derog_neg_emote_h,5,I hate disabled people.,hateful,disabled people,general,hate,hate,,,1,I hate [IDENTITY_P].


In [3]:
# script adaptado para rodar no notebook
#!/usr/bin/env python3
# subtaskA_local.py
# Adapted training script for Subtask A (hate speech detection)
# Supports: LoRA (sequence classification) and Prompt Tuning (causal LM)
# Designed to run locally with LLaMA 3.2 1B/3B, 4-bit quantization (bitsandbytes)

import argparse
import os
import re
from typing import List, Dict, Tuple
import numpy as np
import pandas as pd
from datasets import Dataset
from tqdm.auto import tqdm
from datasets import load_dataset
from sklearn.model_selection import train_test_split

import torch
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
from peft import (
    LoraConfig,
    PromptTuningConfig,
    PromptTuningInit,
    get_peft_model,
)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

# ----------------------- Utilities -----------------------

def simple_preprocess(text: str) -> str:
    if not isinstance(text, str):
        return ''
    # basic normalizations used in the original pipeline
    text = text.replace('&amp;', ' and ')
    text = text.replace('&', ' and ')
    text = re.sub(r'[\r\n]+', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def load_csv(data_path: str, text_col: str = 'text', label_col: str = 'label') -> pd.DataFrame:
    df = pd.read_csv(data_path)
    assert text_col in df.columns, f"Column '{text_col}' not found in CSV"
    if label_col not in df.columns:
        raise ValueError(f"Label column '{label_col}' not found in CSV; Subtask A requires labels 0/1")
    # keep only needed columns and drop na
    df = df[[text_col, label_col]].dropna().reset_index(drop=True)
    df = df.rename(columns={text_col: 'text', label_col: 'label'})
    df['text'] = df['text'].astype(str).map(simple_preprocess)
    df['label'] = df['label'].astype(int)
    return df


def load_dataset_from_bib(dataset_name: str = "dougtrajano/olid-br", text_col: str = 'text', label_col: str = 'label') -> Tuple[pd.DataFrame, pd.DataFrame]:
    dataset = load_dataset(dataset_name) 
    df_train = dataset["train"].to_pandas()
    df_test = dataset["test"].to_pandas()

    df_train["label"] = df_train["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)
    df_test["label"] = df_test["is_offensive"].apply(lambda x: 1 if x == "OFF" else 0)

    # keep only needed columns and drop na
    df_train = df_train[[text_col, label_col]].dropna().reset_index(drop=True)
    df_train = df_train.rename(columns={text_col: 'text', label_col: 'label'})
    df_train['text'] = df_train['text'].astype(str).map(simple_preprocess)
    df_train['label'] = df_train['label'].astype(int)

    # keep only needed columns and drop na
    df_test = df_test[[text_col, label_col]].dropna().reset_index(drop=True)
    df_test = df_test.rename(columns={text_col: 'text', label_col: 'label'})
    df_test['text'] = df_test['text'].astype(str).map(simple_preprocess)
    df_test['label'] = df_test['label'].astype(int)
    return df_train, df_test

def load_synthetic(path: str, text_col: str = 'text', label_col: str = 'label') -> Tuple[pd.DataFrame, pd.DataFrame]:
    df = pd.read_csv("data/all_cases.csv") 
    
    df['text'] = df['test_case']
    df["label"] = df["label_gold"].apply(lambda x: 1 if x == "hateful" else 0)

    df = df[[text_col, label_col]].dropna().reset_index(drop=True)
    df = df.rename(columns={text_col: 'text', label_col: 'label'})
    df['text'] = df['text'].astype(str).map(simple_preprocess)
    df['label'] = df['label'].astype(int)

    df_train, df_test = train_test_split(df, test_size = 0.2, random_state=42, shuffle=True)
    df_train, df_val = train_test_split(df_train, test_size = 0.2, random_state=42, shuffle=True)
    return df_train, df_val, df_test

# ----------------------- LoRA (Sequence Classification) -----------------------

def train_lora(
    model_name: str,
    train_df: pd.DataFrame,
    val_df: pd.DataFrame,
    output_dir: str,
    batch_size: int = 4,
    num_epochs: int = 3,
    learning_rate: float = 1e-4,
    lora_r: int = 16,
    lora_alpha: int = 16,
    lora_dropout: float = 0.05,
    use_4bit: bool = True,
):
    print('*** Training with LoRA (Sequence Classification)')

    # bitsandbytes config for 4-bit
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=use_4bit,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype='float32',
        bnb_4bit_use_double_quant=True,
    )

    # Load tokenizer and model for seq classification
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        device_map='auto',
        quantization_config=bnb_config,
        trust_remote_code=True,
    )

    # Add LoRA PEFT
    peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        target_modules=['q_proj', 'v_proj'],
        lora_dropout=lora_dropout,
        bias='none',
        task_type='SEQ_CLS',
    )

    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()

    # Tokenize datasets
    def tokenize_fn(batch):
        return tokenizer(batch['text'], truncation=True, padding='max_length', max_length=256)

    train_ds = Dataset.from_pandas(train_df)
    val_ds = Dataset.from_pandas(val_df)
    train_ds = train_ds.map(tokenize_fn, batched=True, remove_columns=['text'])
    val_ds = val_ds.map(tokenize_fn, batched=True, remove_columns=['text'])
    train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
    val_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding='longest')

    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        eval_strategy='epoch',
        save_strategy='epoch',
        num_train_epochs=num_epochs,
        learning_rate=learning_rate,
        fp16=False,
        bf16=False,
        logging_strategy='steps',
        logging_steps=50,
        load_best_model_at_end=True,
        metric_for_best_model='eval_f1',
        greater_is_better=True,
        report_to=[],
    )

    def compute_metrics(pred):
        logits = pred.predictions
        preds = np.argmax(logits, axis=1)
        labels = pred.label_ids
        return {
            'accuracy': accuracy_score(labels, preds),
            'f1': f1_score(labels, preds, average='macro'),
            'precision': precision_score(labels, preds, zero_division=0),
            'recall': recall_score(labels, preds, zero_division=0),
        }

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    train_result = trainer.train()
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    print('--- Evaluation on validation set ---')
    preds = trainer.predict(val_ds)
    print(preds.metrics)
    return trainer, tokenizer


# ----------------------- Prompt Tuning (Causal LM) -----------------------

def train_prompt_tuning(
    model_name: str,
    train_df: pd.DataFrame,
    val_df: pd.DataFrame,
    output_dir: str,
    batch_size: int = 4,
    num_epochs: int = 10,
    learning_rate: float = 5e-4,
    num_virtual_tokens: int = 40,
    use_4bit: bool = False,   # IMPORTANT: default False for stability with prompt tuning
):
    """
    Prompt tuning training loop (improved):
     - Uses an explicit instruction template
     - Defaults to no 4-bit quantization for stability (can be enabled)
     - Performs batch generation evaluation, extracting only generated tokens
    """
    import os
    os.environ["ACCELERATE_MIXED_PRECISION"] = "no"  # ensure accelerate not forcing AMP

    print('*** Training with Prompt Tuning (Causal LM) - improved version ***')
    print(f"Model: {model_name} | num_virtual_tokens: {num_virtual_tokens} | use_4bit: {use_4bit}")

    # BitsAndBytes config (used only if use_4bit=True)
    bnb_config = None
    if use_4bit:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type='nf4',
            bnb_4bit_compute_dtype='float16',
            bnb_4bit_use_double_quant=False,
        )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=True)
    # Ensure padding token set
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id
    #tokenizer.padding_side = getattr(tokenizer, "padding_side", "right")
    tokenizer.padding_side = "left"

    # Load model (CausalLM)
    model_kwargs = dict(device_map='auto', trust_remote_code=True)
    if bnb_config is not None:
        model_kwargs['quantization_config'] = bnb_config

    model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)

    # Prompt tuning config (soft prompt initialized from text)
    prompt_cfg = PromptTuningConfig(
        task_type='CAUSAL_LM',
        prompt_tuning_init=PromptTuningInit.TEXT,
        num_virtual_tokens=num_virtual_tokens,
        prompt_tuning_init_text='[INST] Your task: classify if the text contains hate speech (0=no, 1=yes). [/INST]',
        tokenizer_name_or_path=model_name,
    )

    model = get_peft_model(model, prompt_cfg)
    model.print_trainable_parameters()

    # Instruction template (explicit; helps model understand the task)
    template = "[INST] Classify whether the following sentence contains hate speech. Answer with a single token '0' (no) or '1' (yes).\n\nText: {text} [/INST]"

    # Build training-style input sequences where label is target text
    def build_example(row):
        inp = template.format(text=row['text'])
        tgt = str(int(row['label']))
        return {'input_text': inp, 'target_text': tgt}

    train_examples = [build_example(r) for _, r in train_df.iterrows()]
    val_examples = [build_example(r) for _, r in val_df.iterrows()]

    # Tokenize with text_target to enable causal LM labels (Trainer calculates loss)
    def tokenize_fn(examples):
        model_inputs = tokenizer(
            examples['input_text'],
            padding='max_length',
            truncation=True,
            max_length=256,
        )

        # Tokenizar o alvo como texto
        labels = tokenizer(
            examples['target_text'],
            padding='max_length',
            truncation=True,
            max_length=8,
        )['input_ids']

        # Preenche os labels para o mesmo comprimento de input_ids com -100
        full_labels = []
        for i in range(len(model_inputs['input_ids'])):
            lbl = labels[i]
            # cria vetor do mesmo tamanho que input_ids preenchido com -100
            padded = [-100] * len(model_inputs['input_ids'][i])
            # substitui os últimos tokens pelos rótulos (curtos)
            padded[-len(lbl):] = lbl
            full_labels.append(padded)

        model_inputs['labels'] = full_labels
        return model_inputs
        # examples is a dict with lists: 'input_text' and 'target_text'
        #return tokenizer(examples['input_text'], text_target=examples['target_text'],
        #                 padding='max_length', truncation=True, max_length=256)

    train_ds = Dataset.from_list(train_examples)
    val_ds = Dataset.from_list(val_examples)
    train_ds = train_ds.map(tokenize_fn, batched=True, remove_columns=['input_text', 'target_text'])
    val_ds = val_ds.map(tokenize_fn, batched=True, remove_columns=['input_text', 'target_text'])
    train_ds.set_format(type='torch')
    val_ds.set_format(type='torch')

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding='longest')

    # Training args - NO AMP (fp16/bf16) for prompt tuning unless you know what you do
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        eval_strategy='epoch',
        save_strategy='epoch',
        num_train_epochs=num_epochs,
        learning_rate=learning_rate,
        fp16=False,
        bf16=False,
        logging_strategy='steps',
        logging_steps=50,
        load_best_model_at_end=True,
        metric_for_best_model='eval_loss',
        greater_is_better=False,
        report_to=[],
    )

    # We will not use a compute_metrics in Trainer for generation-based metric here (too noisy).
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # Train
    train_result = trainer.train()
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)

    # Manual batch evaluation using generation (faster and controlled)
    print('\n--- Manual evaluation of Prompt-tuned model on validation set (generation + parse) ---')
    model.eval()

    device = next(model.parameters()).device
    val_texts = [template.format(text=t) for t in val_df['text'].tolist()]
    val_labels = [int(x) for x in val_df['label'].tolist()]

    # Batch generation
    preds = []
    batch_size_gen = max(1, 8)  # generation batch size (you can increase if GPU allows)
    for i in range(0, len(val_texts), batch_size_gen):
        batch_texts = val_texts[i:i+batch_size_gen]
        # tokenize input only (no target) for generation
        inputs = tokenizer(batch_texts, return_tensors='pt', padding=True, truncation=True, max_length=256)
        input_ids = inputs['input_ids'].to(device)
        attention_mask = inputs.get('attention_mask', None)
        if attention_mask is not None:
            attention_mask = attention_mask.to(device)

        # generate deterministically (greedy)
        with torch.no_grad():
            out = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=8,   # allow some room to generate label or small text
                do_sample=False,
                temperature=0.0,
                top_k=1,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id,
                early_stopping=True,
                num_return_sequences=1,
            )

        # For each sample, extract only the generated portion (after input length)
        for b_idx in range(out.shape[0]):
            generated_ids = out[b_idx]
            # slice off the input prefix
            in_len = input_ids.shape[1]  # note: this is same for batch due to padding
            # However when padded, the model output includes tokens for padding; safer to find the first occurrence of eos or decode and remove input text
            # Strategy: decode whole output then remove the decoded input prefix if present
            decoded_full = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
            # Try to remove the input text from decoded_full if present (best-effort)
            prefix = tokenizer.decode(input_ids[b_idx], skip_special_tokens=True).strip()
            if decoded_full.startswith(prefix):
                generated_text = decoded_full[len(prefix):].strip()
            else:
                # fallback: take last tokens up to a short string
                generated_text = decoded_full

            # Parse first occurrence of 0 or 1 in generated_text
            m = re.search(r"\b[01]\b", generated_text)
            if m:
                pred = int(m.group())
            else:
                # fallback: sometimes model outputs 'No'/'Yes' or words; map heuristics
                gen_lower = generated_text.lower()
                if gen_lower.startswith('no') or 'no' in gen_lower.split():
                    pred = 0
                elif gen_lower.startswith('yes') or 'yes' in gen_lower.split():
                    pred = 1
                else:
                    # default to most frequent class in training (safe fallback)
                    pred = int(round(np.mean(train_df['label'].values)))  # majority class fallback
            preds.append(pred)

    # Print classification report
    print(classification_report(val_labels, preds, digits=4))

    return trainer, tokenizer



def main(method='lora', model_name='meta-llama/Llama-3.2-1B', output_dir='output', batch_size=4,
         epochs=3, lr=1e-4):
    print('Loading CSV..')

    #olidbr
    #train_df, test_df = load_dataset_from_bib()
    #train_df, val_df = train_test_split(train_df, test_size = 0.2, random_state=42, shuffle=True)
    
    # synthetic
    train_df, val_df, test_df = load_synthetic(path="data/all_cases.csv")

    print(f"Train size: {len(train_df)} - Val size: {len(val_df)} - Test size: {len(test_df)}")
    
    torch.manual_seed(42)

    if method == 'lora':
        trainer, tokenizer = train_lora(
            model_name=model_name,
            train_df=train_df,
            val_df=val_df,
            output_dir=output_dir,
            batch_size=batch_size,
            num_epochs=epochs,
            learning_rate=lr,
            lora_r=16,
            lora_alpha=16,
            lora_dropout=0.05,
            use_4bit=True,
        )
    else:
        trainer, tokenizer = train_prompt_tuning(
            model_name=model_name,
            train_df=train_df,
            val_df=val_df,
            output_dir=output_dir,
            batch_size=batch_size,
            num_epochs=epochs,
            learning_rate=lr,
            num_virtual_tokens=60,
            use_4bit=True,
        )

    print('Done.')


In [4]:
main(method='lora', model_name='meta-llama/Llama-3.2-3B')

Loading CSV..
Train size: 2496 - Val size: 624 - Test size: 781
*** Training with LoRA (Sequence Classification)


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

Exception in thread Thread-7 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\subprocess.py", line 1515, in _readerthread
    buffer.append(fh.read())
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc6 in position 8: invalid continuation byte


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 4,593,664 || all params: 3,217,349,632 || trainable%: 0.1428


Map:   0%|          | 0/2496 [00:00<?, ? examples/s]

Map:   0%|          | 0/624 [00:00<?, ? examples/s]

  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 128001}.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1505,0.017471,0.996795,0.996057,0.995536,1.0


KeyboardInterrupt: 

In [4]:
main(method='prompt', model_name='meta-llama/Llama-3.2-3B', epochs=3)

Loading CSV..
Train size: 2496 - Val size: 624 - Test size: 781
*** Training with Prompt Tuning (Causal LM) - improved version ***
Model: meta-llama/Llama-3.2-3B | num_virtual_tokens: 60 | use_4bit: True


Exception in thread Thread-4 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\subprocess.py", line 1515, in _readerthread
    buffer.append(fh.read())
  File "c:\Users\lucas\anaconda3\envs\env_dl_projeto\lib\codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc6 in position 8: invalid continuation byte


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


trainable params: 184,320 || all params: 3,212,934,144 || trainable%: 0.0057


Map:   0%|          | 0/2496 [00:00<?, ? examples/s]

Map:   0%|          | 0/624 [00:00<?, ? examples/s]

  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 128001}.


Epoch,Training Loss,Validation Loss
1,0.1077,0.043608
2,0.0324,0.019144
3,0.013,0.012792


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k', 'early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



--- Manual evaluation of Prompt-tuned model on validation set (generation + parse) ---




              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       178
           1     0.7147    1.0000    0.8336       446

    accuracy                         0.7147       624
   macro avg     0.3574    0.5000    0.4168       624
weighted avg     0.5109    0.7147    0.5958       624

Done.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
