# Klasyfikacja tekstu za pomocą BERT i GPT2

In [1]:
from transformers import BertTokenizer, \
    BertForSequenceClassification, \
    Trainer, \
    TrainingArguments, \
    DataCollatorWithPadding, \
    pipeline
from peft import PeftModel, \
    PeftConfig, \
    LoraConfig, \
    TaskType, \
    get_peft_model
from datasets import load_dataset, concatenate_datasets, Dataset
from sklearn.metrics import accuracy_score, \
    f1_score, \
    precision_score, \
    recall_score
from sklearn.manifold import TSNE
import plotly.express as px
import numpy as np
import pandas as pd
import torch

## BertForSequenceClassification

In [2]:
dataset = load_dataset(path = 'json', data_files = '../task_1/data/full_text_classification.jsonl')
dataset['train']

Dataset({
    features: ['text', 'label'],
    num_rows: 4441
})

In [3]:
class BertSequence():
    def __init__(self, model_name, use_lora = False):
        self.convert_dict = {
            'pozytywny wydźwięk': 0, 
            'neutralny wydźwięk': 1, 
            'negatywny wydźwięk': 2, 
            'mowa nienawiści': 3
        }
        
        self.model_name = model_name
        self.use_lora = use_lora
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = self._load_model()
        self.tokenizer = self._load_tokenizer()
        
        self.dataset = None
        self.train_dataset = None
        self.val_dataset = None
        self.train_tokenized = None
        self.val_tokenized = None
        self.trainer = None
        self.sentence_embeddings = None
        self.tsne_df = None
    
    def _load_model(self):
        try:
            base_model = BertForSequenceClassification.from_pretrained(
                self.model_name, num_labels = len(self.convert_dict)
            )
            if self.use_lora:
                lora_config = LoraConfig(task_type = TaskType.SEQ_CLS, 
                                         r = 64, 
                                         lora_alpha = 1, 
                                         lora_dropout = 0.1)
                base_model = get_peft_model(model = base_model, peft_config = lora_config)
            return base_model.to(self.device)
        
        except Exception as e:
            print("Error loading model:", e)
            return None
        
    def _load_tokenizer(self):
        try:
            return BertTokenizer.from_pretrained(self.model_name)
        except Exception as e:
            print("Error loading tokenizer:", e)
            return None

    def load_data(self, dataset):
        self.dataset = dataset
        return self.dataset
    
    def _convert_labels(self, text):
        text['label'] = self.convert_dict.get(text['label'], -1)
        return text

    def split_data(self, test_size = 0.2, extra_df=None, train_reduction=1):  #, sample_size = 100
        if self.dataset is None:
            raise ValueError("Dataset is not loaded.")
        
        dataset_to_split = self.dataset['train'].train_test_split(test_size = test_size)
        self.train_dataset = dataset_to_split['train'].map(self._convert_labels)
        if train_reduction < 1:
            self.train_dataset = self.train_dataset.train_test_split(test_size=train_reduction)["test"]
        self.val_dataset = dataset_to_split['test'].map(self._convert_labels)

        if extra_df is not None:
            half_texts = [text[:len(text) // 2] for text in self.val_dataset['text']]
            df_filtered = extra_df[~extra_df['text'].apply(lambda x: any(x.startswith(half) for half in half_texts))]
            self.train_dataset = concatenate_datasets([self.train_dataset, Dataset.from_pandas(df_filtered)])
        
        return self.train_dataset, self.val_dataset
    
    def _tokenize(self, batch):
        return self.tokenizer(batch['text'], padding = 'max_length', truncation = True, max_length = 128)

    def tokenize_data(self):
        if not self.train_dataset or not self.val_dataset:
            raise ValueError("Train/validation datasets are not initialized.")
        
        self.train_tokenized = self.train_dataset.map(self._tokenize, batched = True)
        self.val_tokenized = self.val_dataset.map(self._tokenize, batched = True)
        
        self.train_tokenized.set_format('torch', columns = ['input_ids', 'attention_mask', 'label'])
        self.val_tokenized.set_format('torch', columns = ['input_ids', 'attention_mask', 'label'])
        
        return self.train_tokenized, self.val_tokenized

    def _compute_metrics(self, pred):
        labels = pred.label_ids
        preds = np.argmax(pred.predictions, axis = 1)
        accuracy = accuracy_score(labels, preds)
        f1 = f1_score(labels, preds, average = 'weighted')
        precision = precision_score(labels, preds, average = 'weighted', zero_division = np.nan)
        recall = recall_score(labels, preds, average='weighted', zero_division = np.nan)
        return {'accuracy': accuracy, 'f1': f1, 'precision': precision, 'recall': recall}
    
    def create_trainer(self, output_dir = './results'):
        if not self.train_tokenized or not self.val_tokenized:
            raise ValueError("Tokenized datasets are not initialized.")
        
        training_args = TrainingArguments(
            output_dir = output_dir,
            eval_strategy = 'epoch',
            learning_rate = 2e-3,
            per_device_train_batch_size = 4,
            per_device_eval_batch_size = 4,
            num_train_epochs = 10,
            weight_decay = 0.01,
            logging_dir = './logs',
        )
        
        self.trainer = Trainer(
            model = self.model,
            args = training_args,
            train_dataset = self.train_tokenized,
            eval_dataset = self.val_tokenized,
            data_collator = DataCollatorWithPadding(tokenizer = self.tokenizer),
            compute_metrics = self._compute_metrics,
        )
        
        return self
    
    def train(self):
        if self.trainer:
            return self.trainer.train()
        else:
            raise ValueError("Trainer is not initialized.")
    
    def evaluate(self):
        if self.trainer:
            return self.trainer.evaluate()
        else:
            raise ValueError("Trainer is not initialized.")

    def _create_embeddings(self):
        inputs = self.tokenizer(self.val_dataset['text'], 
                                padding = True, 
                                truncation = True, 
                                max_length = 128, 
                                return_tensors = 'pt').to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs, output_hidden_states = True)
            last_hidden_states = outputs.hidden_states[-1]
        self.sentence_embeddings = last_hidden_states[:, 0, :].cpu()
    
    def plot_embeddings(self):
        self._create_embeddings()

        tsne = TSNE(n_components = 2, random_state = 42, perplexity = 2)
        tsne_results = tsne.fit_transform(self.sentence_embeddings)

        self.tsne_df = pd.DataFrame(tsne_results, columns = ['x', 'y'])
        label_mapping = {v: k for k, v in self.convert_dict.items()}
        self.tsne_df['label'] = [label_mapping[label] for label in self.val_dataset['label']]
        self.tsne_df['text'] = self.val_dataset['text']

        color_map = px.colors.qualitative.Vivid[:len(self.convert_dict)]
        
        fig = px.scatter(
            self.tsne_df, x = 'x', y = 'y', color = 'label',
            title = 'Wizualizacja osadzeń SentenceBERT przy użyciu t-SNE',
            labels = {'label': 'Wydźwięk'},
            hover_name = self.tsne_df['text'],
            color_discrete_sequence = color_map
        )
        
        fig.update_traces(marker = dict(size = 10), selector = dict(mode = 'markers'))
        fig.show()

In [4]:
def run_model(model_name, dataset, use_lora = False, extra_df=None, train_reduction=1):
    model = BertSequence(model_name, use_lora)
    model.load_data(dataset)
    train, test = model.split_data(extra_df=extra_df, train_reduction=train_reduction)
    train_tokenized, test_tokenized = model.tokenize_data()
    trainer = model.create_trainer()
    trainer.train()
    # metrics = trainer.evaluate()
    # print(metrics)
    model.plot_embeddings()
    
    return model

In [None]:
model_base = run_model('dkleczek/bert-base-polish-uncased-v1', dataset, use_lora = True)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 3552/3552 [00:00<00:00, 21662.59 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 20084.98 examples/s]
Map: 100%|██████████| 3552/3552 [00:00<00:00, 4932.62 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 4680.86 examples/s]
  attn_output = torch.nn.functional.scaled_dot_product_attention(
  6%|▌         | 500/8880 [00:51<14:04,  9.92it/s]

{'loss': 1.032, 'grad_norm': 8.555021286010742, 'learning_rate': 0.0018873873873873875, 'epoch': 0.56}


                                                  
 10%|█         | 888/8880 [01:39<12:26, 10.71it/s]

{'eval_loss': 1.2841933965682983, 'eval_accuracy': 0.6209223847019123, 'eval_f1': 0.5735525839615472, 'eval_precision': 0.6829153963692204, 'eval_recall': 0.6209223847019123, 'eval_runtime': 9.5065, 'eval_samples_per_second': 93.515, 'eval_steps_per_second': 23.458, 'epoch': 1.0}


 11%|█▏        | 1000/8880 [01:50<13:21,  9.83it/s] 

{'loss': 0.9116, 'grad_norm': 5.273428916931152, 'learning_rate': 0.001774774774774775, 'epoch': 1.13}


 17%|█▋        | 1500/8880 [02:41<12:22,  9.94it/s]

{'loss': 0.8936, 'grad_norm': 5.153275966644287, 'learning_rate': 0.0016621621621621622, 'epoch': 1.69}


                                                   
 20%|██        | 1777/8880 [03:19<4:12:12,  2.13s/it]

{'eval_loss': 1.0732808113098145, 'eval_accuracy': 0.6400449943757031, 'eval_f1': 0.6396516613476215, 'eval_precision': 0.6630612876595001, 'eval_recall': 0.6400449943757031, 'eval_runtime': 9.8066, 'eval_samples_per_second': 90.654, 'eval_steps_per_second': 22.74, 'epoch': 2.0}


 23%|██▎       | 2000/8880 [03:42<11:27, 10.01it/s]  

{'loss': 0.7828, 'grad_norm': 5.388566017150879, 'learning_rate': 0.0015495495495495494, 'epoch': 2.25}


 28%|██▊       | 2500/8880 [04:33<10:39,  9.98it/s]

{'loss': 0.7376, 'grad_norm': 5.473073482513428, 'learning_rate': 0.0014369369369369369, 'epoch': 2.82}


                                                   
 30%|███       | 2666/8880 [04:59<2:38:06,  1.53s/it]

{'eval_loss': 1.0354586839675903, 'eval_accuracy': 0.6636670416197975, 'eval_f1': 0.6534564959551702, 'eval_precision': 0.6577872104058562, 'eval_recall': 0.6636670416197975, 'eval_runtime': 9.5146, 'eval_samples_per_second': 93.435, 'eval_steps_per_second': 23.438, 'epoch': 3.0}


 34%|███▍      | 3000/8880 [05:32<09:25, 10.40it/s]  

{'loss': 0.6218, 'grad_norm': 4.6070451736450195, 'learning_rate': 0.0013243243243243243, 'epoch': 3.38}


 39%|███▉      | 3500/8880 [06:21<08:37, 10.40it/s]

{'loss': 0.6183, 'grad_norm': 9.094426155090332, 'learning_rate': 0.0012117117117117118, 'epoch': 3.94}


                                                   
 40%|████      | 3553/8880 [06:36<3:17:37,  2.23s/it]

{'eval_loss': 1.2798043489456177, 'eval_accuracy': 0.6749156355455568, 'eval_f1': 0.6547417488104524, 'eval_precision': 0.6675368113155506, 'eval_recall': 0.6749156355455568, 'eval_runtime': 9.6291, 'eval_samples_per_second': 92.324, 'eval_steps_per_second': 23.159, 'epoch': 4.0}


 45%|████▌     | 4000/8880 [07:21<07:48, 10.42it/s]  

{'loss': 0.4639, 'grad_norm': 0.12660260498523712, 'learning_rate': 0.0010990990990990992, 'epoch': 4.5}


                                                   
 50%|█████     | 4441/8880 [08:16<3:06:05,  2.52s/it]

{'eval_loss': 1.3911395072937012, 'eval_accuracy': 0.6467941507311586, 'eval_f1': 0.6493358782272306, 'eval_precision': 0.659336016626981, 'eval_recall': 0.6467941507311586, 'eval_runtime': 9.7372, 'eval_samples_per_second': 91.299, 'eval_steps_per_second': 22.902, 'epoch': 5.0}


 51%|█████     | 4500/8880 [08:22<07:24,  9.85it/s]  

{'loss': 0.4951, 'grad_norm': 2.313028573989868, 'learning_rate': 0.0009864864864864865, 'epoch': 5.07}


 56%|█████▋    | 5000/8880 [09:11<06:00, 10.77it/s]

{'loss': 0.3746, 'grad_norm': 1.1627041101455688, 'learning_rate': 0.0008738738738738738, 'epoch': 5.63}


                                                   
 60%|██████    | 5330/8880 [09:51<1:25:18,  1.44s/it]

{'eval_loss': 1.4699045419692993, 'eval_accuracy': 0.6749156355455568, 'eval_f1': 0.6779564988782738, 'eval_precision': 0.6850879398727259, 'eval_recall': 0.6749156355455568, 'eval_runtime': 8.9833, 'eval_samples_per_second': 98.961, 'eval_steps_per_second': 24.824, 'epoch': 6.0}


 62%|██████▏   | 5500/8880 [10:08<05:40,  9.92it/s]  

{'loss': 0.368, 'grad_norm': 1.6685246229171753, 'learning_rate': 0.0007612612612612613, 'epoch': 6.19}


 68%|██████▊   | 6000/8880 [10:57<04:49,  9.96it/s]

{'loss': 0.2808, 'grad_norm': 0.1445261687040329, 'learning_rate': 0.0006486486486486487, 'epoch': 6.76}


                                                   
 70%|███████   | 6217/8880 [11:29<1:17:54,  1.76s/it]

{'eval_loss': 1.7925153970718384, 'eval_accuracy': 0.6670416197975253, 'eval_f1': 0.668363528848233, 'eval_precision': 0.6833251530717616, 'eval_recall': 0.6670416197975253, 'eval_runtime': 9.5413, 'eval_samples_per_second': 93.174, 'eval_steps_per_second': 23.372, 'epoch': 7.0}


 73%|███████▎  | 6500/8880 [11:58<04:03,  9.77it/s]  

{'loss': 0.2572, 'grad_norm': 0.011362848803400993, 'learning_rate': 0.000536036036036036, 'epoch': 7.32}


 79%|███████▉  | 7000/8880 [12:47<03:00, 10.39it/s]

{'loss': 0.2281, 'grad_norm': 0.4460116922855377, 'learning_rate': 0.0004234234234234234, 'epoch': 7.88}


                                                   
 80%|████████  | 7105/8880 [13:07<44:43,  1.51s/it]

{'eval_loss': 2.0000033378601074, 'eval_accuracy': 0.655793025871766, 'eval_f1': 0.6606516590236199, 'eval_precision': 0.6776113383873587, 'eval_recall': 0.655793025871766, 'eval_runtime': 9.4304, 'eval_samples_per_second': 94.269, 'eval_steps_per_second': 23.647, 'epoch': 8.0}


 84%|████████▍ | 7500/8880 [13:45<02:12, 10.44it/s]

{'loss': 0.1589, 'grad_norm': 5.597871780395508, 'learning_rate': 0.0003108108108108108, 'epoch': 8.45}


                                                   
 90%|█████████ | 7993/8880 [14:42<22:14,  1.50s/it]

{'eval_loss': 2.093378782272339, 'eval_accuracy': 0.6737907761529809, 'eval_f1': 0.6750452329947789, 'eval_precision': 0.6820931225606343, 'eval_recall': 0.6737907761529809, 'eval_runtime': 9.3805, 'eval_samples_per_second': 94.771, 'eval_steps_per_second': 23.773, 'epoch': 9.0}


 90%|█████████ | 8000/8880 [14:42<08:29,  1.73it/s]

{'loss': 0.1889, 'grad_norm': 0.06768179684877396, 'learning_rate': 0.0001981981981981982, 'epoch': 9.01}


 96%|█████████▌| 8500/8880 [15:31<00:36, 10.43it/s]

{'loss': 0.1202, 'grad_norm': 0.5885409712791443, 'learning_rate': 8.558558558558558e-05, 'epoch': 9.57}


                                                   
100%|██████████| 8880/8880 [16:17<00:00,  9.08it/s]


{'eval_loss': 2.127368688583374, 'eval_accuracy': 0.671541057367829, 'eval_f1': 0.672943989658071, 'eval_precision': 0.6786641553476217, 'eval_recall': 0.671541057367829, 'eval_runtime': 9.3973, 'eval_samples_per_second': 94.602, 'eval_steps_per_second': 23.73, 'epoch': 10.0}
{'train_runtime': 977.5688, 'train_samples_per_second': 36.335, 'train_steps_per_second': 9.084, 'train_loss': 0.485592558147671, 'epoch': 10.0}


In [7]:
generated = pd.read_json('new_samples.jsonl', lines=True, encoding='utf-8')
generated['label'] = generated['label'].map({
            'pozytywny wydźwięk': 0, 
            'neutralny wydźwięk': 1, 
            'negatywny wydźwięk': 2, 
            'mowa nienawiści': 3
        })
generated

Unnamed: 0,text,label
0,@USER @USER Jesienią mieliśmy trzy zwycięstwa :D.,0
1,@USER myślałem że coś na meczu zrobił bo dzisi...,0
2,"@USER @USER Obejrzałam dziś setk filmów, które...",0
3,@USER patrze na Wisłe i nie mam tam terminarza...,0
4,@USER @USER @USER Ale to były Twoje ostanie sł...,0
...,...,...
1495,@USER @USER Pogonili cię z MON i nadal cwaniak...,3
1496,Gdyby @USER była zwykłą celebrytką to ten gest...,3
1497,@USER Niech się pedofilami zajmijcie!,3
1498,Craxa dziwko derby blisko.....,3


In [None]:
model_gen = run_model('dkleczek/bert-base-polish-uncased-v1', dataset, use_lora = True, extra_df=generated)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 3552/3552 [00:00<00:00, 27215.59 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 21167.96 examples/s]
Map: 100%|██████████| 6952/6952 [00:02<00:00, 3352.30 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 4690.93 examples/s]
  3%|▎         | 500/17380 [00:50<28:52,  9.74it/s]

{'loss': 1.2456, 'grad_norm': 5.3903279304504395, 'learning_rate': 0.0019424626006904488, 'epoch': 0.29}


  6%|▌         | 1000/17380 [01:44<29:20,  9.30it/s]

{'loss': 1.1024, 'grad_norm': 4.744230270385742, 'learning_rate': 0.0018849252013808976, 'epoch': 0.58}


  9%|▊         | 1500/17380 [02:40<30:01,  8.81it/s]

{'loss': 1.0269, 'grad_norm': 5.186079025268555, 'learning_rate': 0.0018273878020713464, 'epoch': 0.86}


 10%|█         | 1738/17380 [03:07<29:14,  8.92it/s]
 10%|█         | 1739/17380 [03:18<14:54:29,  3.43s/it]

{'eval_loss': 0.8868707418441772, 'eval_accuracy': 0.6501687289088864, 'eval_f1': 0.643306434083761, 'eval_precision': 0.6426017937191506, 'eval_recall': 0.6501687289088864, 'eval_runtime': 11.0461, 'eval_samples_per_second': 80.481, 'eval_steps_per_second': 20.188, 'epoch': 1.0}


 12%|█▏        | 2000/17380 [03:48<28:40,  8.94it/s]   

{'loss': 0.9471, 'grad_norm': 4.040480613708496, 'learning_rate': 0.0017698504027617952, 'epoch': 1.15}


 14%|█▍        | 2500/17380 [04:44<27:28,  9.03it/s]

{'loss': 0.959, 'grad_norm': 5.683638572692871, 'learning_rate': 0.0017123130034522442, 'epoch': 1.44}


 17%|█▋        | 3000/17380 [05:40<26:58,  8.89it/s]

{'loss': 0.8716, 'grad_norm': 3.640582323074341, 'learning_rate': 0.0016547756041426927, 'epoch': 1.73}


 20%|██        | 3476/17380 [06:33<25:57,  8.92it/s]
 20%|██        | 3477/17380 [06:44<13:09:59,  3.41s/it]

{'eval_loss': 0.844140350818634, 'eval_accuracy': 0.6839145106861643, 'eval_f1': 0.6698286888868137, 'eval_precision': 0.6734351843756646, 'eval_recall': 0.6839145106861643, 'eval_runtime': 10.9786, 'eval_samples_per_second': 80.976, 'eval_steps_per_second': 20.312, 'epoch': 2.0}


 20%|██        | 3500/17380 [06:47<25:59,  8.90it/s]   

{'loss': 0.9158, 'grad_norm': 2.0132317543029785, 'learning_rate': 0.0015972382048331415, 'epoch': 2.01}


 23%|██▎       | 4000/17380 [07:43<24:50,  8.97it/s]

{'loss': 0.7766, 'grad_norm': 5.82294225692749, 'learning_rate': 0.0015397008055235903, 'epoch': 2.3}


 26%|██▌       | 4500/17380 [08:39<23:32,  9.12it/s]

{'loss': 0.8265, 'grad_norm': 4.5971832275390625, 'learning_rate': 0.0014821634062140393, 'epoch': 2.59}


 29%|██▉       | 5000/17380 [09:33<20:16, 10.17it/s]

{'loss': 0.7972, 'grad_norm': 8.572182655334473, 'learning_rate': 0.0014246260069044878, 'epoch': 2.88}


 30%|███       | 5214/17380 [09:54<20:06, 10.09it/s]
 30%|███       | 5216/17380 [10:04<5:21:50,  1.59s/it]

{'eval_loss': 1.0458341836929321, 'eval_accuracy': 0.641169853768279, 'eval_f1': 0.6377714485813739, 'eval_precision': 0.6707358236133457, 'eval_recall': 0.641169853768279, 'eval_runtime': 9.9124, 'eval_samples_per_second': 89.686, 'eval_steps_per_second': 22.497, 'epoch': 3.0}


 32%|███▏      | 5500/17380 [10:32<19:18, 10.25it/s]  

{'loss': 0.6498, 'grad_norm': 4.076499938964844, 'learning_rate': 0.0013670886075949366, 'epoch': 3.16}


 35%|███▍      | 6000/17380 [11:25<20:06,  9.43it/s]

{'loss': 0.6532, 'grad_norm': 2.303037643432617, 'learning_rate': 0.0013095512082853856, 'epoch': 3.45}


 37%|███▋      | 6500/17380 [12:18<19:44,  9.18it/s]

{'loss': 0.6599, 'grad_norm': 4.771157264709473, 'learning_rate': 0.0012520138089758344, 'epoch': 3.74}


 40%|████      | 6952/17380 [13:07<18:35,  9.35it/s]
 40%|████      | 6953/17380 [13:18<9:20:48,  3.23s/it]

{'eval_loss': 1.0428688526153564, 'eval_accuracy': 0.7109111361079865, 'eval_f1': 0.697923562800358, 'eval_precision': 0.7191905725091897, 'eval_recall': 0.7109111361079865, 'eval_runtime': 10.3831, 'eval_samples_per_second': 85.62, 'eval_steps_per_second': 21.477, 'epoch': 4.0}


 40%|████      | 7000/17380 [13:23<18:21,  9.43it/s]  

{'loss': 0.6554, 'grad_norm': 0.806947648525238, 'learning_rate': 0.001194476409666283, 'epoch': 4.03}


 43%|████▎     | 7500/17380 [14:18<16:36,  9.92it/s]

{'loss': 0.4916, 'grad_norm': 3.2879116535186768, 'learning_rate': 0.001136939010356732, 'epoch': 4.32}


 46%|████▌     | 8000/17380 [15:09<15:03, 10.39it/s]

{'loss': 0.5237, 'grad_norm': 0.12488222867250443, 'learning_rate': 0.0010794016110471808, 'epoch': 4.6}


 49%|████▉     | 8500/17380 [15:59<14:22, 10.30it/s]

{'loss': 0.5532, 'grad_norm': 2.2694454193115234, 'learning_rate': 0.0010218642117376295, 'epoch': 4.89}


 50%|████▉     | 8689/17380 [16:18<14:06, 10.26it/s]
 50%|█████     | 8691/17380 [16:28<3:48:39,  1.58s/it]

{'eval_loss': 1.0187673568725586, 'eval_accuracy': 0.7052868391451068, 'eval_f1': 0.6986278751618228, 'eval_precision': 0.700570788434284, 'eval_recall': 0.7052868391451068, 'eval_runtime': 9.8658, 'eval_samples_per_second': 90.109, 'eval_steps_per_second': 22.603, 'epoch': 5.0}


 52%|█████▏    | 9000/17380 [16:59<14:03,  9.94it/s]  

{'loss': 0.4252, 'grad_norm': 5.989590644836426, 'learning_rate': 0.0009643268124280783, 'epoch': 5.18}


 55%|█████▍    | 9500/17380 [17:48<12:37, 10.40it/s]

{'loss': 0.4082, 'grad_norm': 4.686117172241211, 'learning_rate': 0.000906789413118527, 'epoch': 5.47}


 58%|█████▊    | 10000/17380 [18:39<12:14, 10.05it/s]

{'loss': 0.4601, 'grad_norm': 2.969848155975342, 'learning_rate': 0.0008492520138089759, 'epoch': 5.75}


 60%|█████▉    | 10427/17380 [19:22<11:28, 10.10it/s]
 60%|██████    | 10429/17380 [19:32<3:02:26,  1.57s/it]

{'eval_loss': 1.1683367490768433, 'eval_accuracy': 0.7289088863892014, 'eval_f1': 0.7219568325197245, 'eval_precision': 0.7255896033098932, 'eval_recall': 0.7289088863892014, 'eval_runtime': 9.7399, 'eval_samples_per_second': 91.274, 'eval_steps_per_second': 22.896, 'epoch': 6.0}


 60%|██████    | 10500/17380 [19:39<11:10, 10.27it/s]  

{'loss': 0.3891, 'grad_norm': 7.214995861053467, 'learning_rate': 0.0007917146144994247, 'epoch': 6.04}


 63%|██████▎   | 11000/17380 [20:29<10:14, 10.38it/s]

{'loss': 0.3092, 'grad_norm': 0.8020390272140503, 'learning_rate': 0.0007341772151898734, 'epoch': 6.33}


 66%|██████▌   | 11500/17380 [21:19<09:38, 10.17it/s]

{'loss': 0.3104, 'grad_norm': 0.22108261287212372, 'learning_rate': 0.0006766398158803222, 'epoch': 6.62}


 69%|██████▉   | 12000/17380 [22:12<09:47,  9.15it/s]

{'loss': 0.2973, 'grad_norm': 3.6785097122192383, 'learning_rate': 0.000619102416570771, 'epoch': 6.9}


 70%|███████   | 12166/17380 [22:31<09:30,  9.15it/s]
 70%|███████   | 12167/17380 [22:42<4:46:57,  3.30s/it]

{'eval_loss': 1.2580983638763428, 'eval_accuracy': 0.7289088863892014, 'eval_f1': 0.7266090253097621, 'eval_precision': 0.7292531022934478, 'eval_recall': 0.7289088863892014, 'eval_runtime': 10.6335, 'eval_samples_per_second': 83.604, 'eval_steps_per_second': 20.972, 'epoch': 7.0}


 72%|███████▏  | 12500/17380 [23:18<08:49,  9.21it/s]  

{'loss': 0.308, 'grad_norm': 0.042869776487350464, 'learning_rate': 0.0005615650172612198, 'epoch': 7.19}


 75%|███████▍  | 13000/17380 [24:12<08:08,  8.96it/s]

{'loss': 0.246, 'grad_norm': 0.005788292735815048, 'learning_rate': 0.0005040276179516687, 'epoch': 7.48}


 78%|███████▊  | 13500/17380 [25:08<07:06,  9.09it/s]

{'loss': 0.2295, 'grad_norm': 4.377426624298096, 'learning_rate': 0.00044649021864211734, 'epoch': 7.77}


 80%|████████  | 13904/17380 [25:53<06:26,  8.99it/s]
 80%|████████  | 13905/17380 [26:04<3:17:41,  3.41s/it]

{'eval_loss': 1.4859217405319214, 'eval_accuracy': 0.7480314960629921, 'eval_f1': 0.7459034944706555, 'eval_precision': 0.7521120710885545, 'eval_recall': 0.7480314960629921, 'eval_runtime': 10.9932, 'eval_samples_per_second': 80.868, 'eval_steps_per_second': 20.285, 'epoch': 8.0}


 81%|████████  | 14000/17380 [26:15<06:22,  8.84it/s]  

{'loss': 0.1904, 'grad_norm': 0.13343346118927002, 'learning_rate': 0.0003889528193325662, 'epoch': 8.06}


 83%|████████▎ | 14500/17380 [27:10<05:13,  9.18it/s]

{'loss': 0.1771, 'grad_norm': 0.004382592160254717, 'learning_rate': 0.00033141542002301496, 'epoch': 8.34}


 86%|████████▋ | 15000/17380 [28:05<04:26,  8.93it/s]

{'loss': 0.1888, 'grad_norm': 0.0030152376275509596, 'learning_rate': 0.00027387802071346374, 'epoch': 8.63}


 89%|████████▉ | 15500/17380 [29:02<03:31,  8.90it/s]

{'loss': 0.1936, 'grad_norm': 5.814817428588867, 'learning_rate': 0.00021634062140391254, 'epoch': 8.92}


 90%|█████████ | 15642/17380 [29:18<03:08,  9.20it/s]
 90%|█████████ | 15643/17380 [29:28<1:36:28,  3.33s/it]

{'eval_loss': 1.5043483972549438, 'eval_accuracy': 0.7277840269966255, 'eval_f1': 0.725633343673201, 'eval_precision': 0.7326387938439904, 'eval_recall': 0.7277840269966255, 'eval_runtime': 10.7296, 'eval_samples_per_second': 82.855, 'eval_steps_per_second': 20.784, 'epoch': 9.0}


 92%|█████████▏| 16000/17380 [30:08<02:35,  8.88it/s]  

{'loss': 0.1622, 'grad_norm': 0.00031703326385468245, 'learning_rate': 0.00015880322209436135, 'epoch': 9.21}


 95%|█████████▍| 16500/17380 [31:04<01:39,  8.85it/s]

{'loss': 0.1311, 'grad_norm': 0.04865933954715729, 'learning_rate': 0.00010126582278481013, 'epoch': 9.49}


 98%|█████████▊| 17000/17380 [32:00<00:42,  8.97it/s]

{'loss': 0.1606, 'grad_norm': 7.040196895599365, 'learning_rate': 4.372842347525892e-05, 'epoch': 9.78}


100%|██████████| 17380/17380 [32:43<00:00,  8.82it/s]
100%|██████████| 17380/17380 [32:54<00:00,  8.80it/s]


{'eval_loss': 1.4967893362045288, 'eval_accuracy': 0.7401574803149606, 'eval_f1': 0.7394883097888073, 'eval_precision': 0.742562656779451, 'eval_recall': 0.7401574803149606, 'eval_runtime': 10.826, 'eval_samples_per_second': 82.117, 'eval_steps_per_second': 20.599, 'epoch': 10.0}
{'train_runtime': 1974.9686, 'train_samples_per_second': 35.201, 'train_steps_per_second': 8.8, 'train_loss': 0.5280103843697744, 'epoch': 10.0}


In [23]:
model_gen = run_model('dkleczek/bert-base-polish-uncased-v1', dataset, use_lora = True, extra_df=generated)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 3552/3552 [00:00<00:00, 28644.48 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 16395.02 examples/s]
Map: 100%|██████████| 6116/6116 [00:02<00:00, 2947.35 examples/s]
Map: 100%|██████████| 889/889 [00:00<00:00, 4208.31 examples/s]
  3%|▎         | 500/15290 [00:50<24:38, 10.00it/s]

{'loss': 1.1983, 'grad_norm': 6.659948348999023, 'learning_rate': 0.001934597776324395, 'epoch': 0.33}


  7%|▋         | 1000/15290 [01:39<22:50, 10.43it/s]

{'loss': 1.0941, 'grad_norm': 5.1766157150268555, 'learning_rate': 0.00186919555264879, 'epoch': 0.65}


 10%|▉         | 1500/15290 [02:28<23:06,  9.95it/s]

{'loss': 1.026, 'grad_norm': 6.989492893218994, 'learning_rate': 0.0018037933289731852, 'epoch': 0.98}


 10%|█         | 1529/15290 [02:31<22:12, 10.33it/s]
 10%|█         | 1529/15290 [02:41<22:12, 10.33it/s]

{'eval_loss': 0.9485820531845093, 'eval_accuracy': 0.6490438695163104, 'eval_f1': 0.6411862159508835, 'eval_precision': 0.6614403110385118, 'eval_recall': 0.6490438695163104, 'eval_runtime': 9.5433, 'eval_samples_per_second': 93.155, 'eval_steps_per_second': 23.367, 'epoch': 1.0}


 13%|█▎        | 2000/15290 [03:26<21:20, 10.38it/s]  

{'loss': 0.962, 'grad_norm': 4.476503372192383, 'learning_rate': 0.00173839110529758, 'epoch': 1.31}


 16%|█▋        | 2500/15290 [04:15<20:36, 10.34it/s]

{'loss': 0.9011, 'grad_norm': 3.7157745361328125, 'learning_rate': 0.0016729888816219751, 'epoch': 1.64}


 20%|█▉        | 3000/15290 [05:04<19:52, 10.31it/s]

{'loss': 0.8609, 'grad_norm': 3.1122796535491943, 'learning_rate': 0.0016075866579463702, 'epoch': 1.96}


 20%|██        | 3058/15290 [05:10<19:42, 10.34it/s]
 20%|██        | 3058/15290 [05:20<19:42, 10.34it/s]

{'eval_loss': 1.0073072910308838, 'eval_accuracy': 0.6490438695163104, 'eval_f1': 0.6404836444608932, 'eval_precision': 0.644178798378607, 'eval_recall': 0.6490438695163104, 'eval_runtime': 9.6389, 'eval_samples_per_second': 92.231, 'eval_steps_per_second': 23.136, 'epoch': 2.0}


 23%|██▎       | 3500/15290 [06:04<20:21,  9.65it/s]  

{'loss': 0.7547, 'grad_norm': 4.623952388763428, 'learning_rate': 0.0015421844342707652, 'epoch': 2.29}


 26%|██▌       | 4000/15290 [06:54<17:40, 10.64it/s]

{'loss': 0.7736, 'grad_norm': 4.300515651702881, 'learning_rate': 0.0014767822105951603, 'epoch': 2.62}


 29%|██▉       | 4500/15290 [07:42<16:54, 10.63it/s]

{'loss': 0.7676, 'grad_norm': 3.5231921672821045, 'learning_rate': 0.0014113799869195553, 'epoch': 2.94}


 30%|██▉       | 4586/15290 [07:50<16:50, 10.59it/s]
 30%|███       | 4588/15290 [08:00<4:25:49,  1.49s/it]

{'eval_loss': 1.107706069946289, 'eval_accuracy': 0.6434195725534309, 'eval_f1': 0.6430060495858976, 'eval_precision': 0.6523289073054953, 'eval_recall': 0.6434195725534309, 'eval_runtime': 9.2935, 'eval_samples_per_second': 95.658, 'eval_steps_per_second': 23.995, 'epoch': 3.0}


 33%|███▎      | 5000/15290 [08:39<16:09, 10.61it/s]  

{'loss': 0.6487, 'grad_norm': 4.03530216217041, 'learning_rate': 0.0013459777632439502, 'epoch': 3.27}


 36%|███▌      | 5500/15290 [09:27<15:44, 10.36it/s]

{'loss': 0.5979, 'grad_norm': 1.6542956829071045, 'learning_rate': 0.0012805755395683455, 'epoch': 3.6}


 39%|███▉      | 6000/15290 [10:16<15:05, 10.26it/s]

{'loss': 0.5866, 'grad_norm': 3.975454330444336, 'learning_rate': 0.0012151733158927403, 'epoch': 3.92}


 40%|███▉      | 6115/15290 [10:27<14:40, 10.42it/s]
 40%|████      | 6117/15290 [10:37<3:51:05,  1.51s/it]

{'eval_loss': 1.2926857471466064, 'eval_accuracy': 0.6704161979752531, 'eval_f1': 0.6679458458487009, 'eval_precision': 0.6684336462852274, 'eval_recall': 0.6704161979752531, 'eval_runtime': 9.4295, 'eval_samples_per_second': 94.278, 'eval_steps_per_second': 23.649, 'epoch': 4.0}


 43%|████▎     | 6500/15290 [11:15<14:52,  9.84it/s]  

{'loss': 0.4874, 'grad_norm': 3.5461952686309814, 'learning_rate': 0.0011497710922171354, 'epoch': 4.25}


 46%|████▌     | 7000/15290 [12:06<14:05,  9.81it/s]

{'loss': 0.4673, 'grad_norm': 0.6619682908058167, 'learning_rate': 0.0010843688685415304, 'epoch': 4.58}


 49%|████▉     | 7500/15290 [12:57<13:26,  9.66it/s]

{'loss': 0.4894, 'grad_norm': 4.322360038757324, 'learning_rate': 0.0010189666448659255, 'epoch': 4.91}


 50%|█████     | 7645/15290 [13:12<12:59,  9.81it/s]
 50%|█████     | 7646/15290 [13:22<6:34:57,  3.10s/it]

{'eval_loss': 1.3864394426345825, 'eval_accuracy': 0.6681664791901012, 'eval_f1': 0.6630324318594929, 'eval_precision': 0.6614070163288301, 'eval_recall': 0.6681664791901012, 'eval_runtime': 9.9827, 'eval_samples_per_second': 89.054, 'eval_steps_per_second': 22.339, 'epoch': 5.0}


 52%|█████▏    | 8000/15290 [13:58<12:31,  9.70it/s]  

{'loss': 0.3676, 'grad_norm': 0.17063812911510468, 'learning_rate': 0.0009535644211903205, 'epoch': 5.23}


 56%|█████▌    | 8500/15290 [14:50<11:33,  9.78it/s]

{'loss': 0.3641, 'grad_norm': 7.393731117248535, 'learning_rate': 0.0008881621975147155, 'epoch': 5.56}


 59%|█████▉    | 9000/15290 [15:41<10:27, 10.02it/s]

{'loss': 0.3821, 'grad_norm': 4.122737407684326, 'learning_rate': 0.0008227599738391105, 'epoch': 5.89}


 60%|█████▉    | 9173/15290 [15:58<10:09, 10.03it/s]
 60%|██████    | 9175/15290 [16:08<2:43:17,  1.60s/it]

{'eval_loss': 1.710202932357788, 'eval_accuracy': 0.6580427446569179, 'eval_f1': 0.6484294145546344, 'eval_precision': 0.64729228710932, 'eval_recall': 0.6580427446569179, 'eval_runtime': 9.8548, 'eval_samples_per_second': 90.209, 'eval_steps_per_second': 22.628, 'epoch': 6.0}


 62%|██████▏   | 9500/15290 [16:41<09:48,  9.83it/s]  

{'loss': 0.2958, 'grad_norm': 0.49742668867111206, 'learning_rate': 0.0007573577501635056, 'epoch': 6.21}


 65%|██████▌   | 10000/15290 [17:32<08:48, 10.01it/s]

{'loss': 0.2559, 'grad_norm': 0.07480881363153458, 'learning_rate': 0.0006919555264879007, 'epoch': 6.54}


 69%|██████▊   | 10500/15290 [18:23<08:03,  9.90it/s]

{'loss': 0.2777, 'grad_norm': 0.17887644469738007, 'learning_rate': 0.0006265533028122956, 'epoch': 6.87}


 70%|███████   | 10703/15290 [18:44<07:47,  9.81it/s]
 70%|███████   | 10704/15290 [18:54<3:46:36,  2.96s/it]

{'eval_loss': 1.8830510377883911, 'eval_accuracy': 0.6546681664791901, 'eval_f1': 0.6563533732316167, 'eval_precision': 0.6591005463891344, 'eval_recall': 0.6546681664791901, 'eval_runtime': 9.8703, 'eval_samples_per_second': 90.068, 'eval_steps_per_second': 22.593, 'epoch': 7.0}


 72%|███████▏  | 11000/15290 [19:23<07:06, 10.06it/s]  

{'loss': 0.2125, 'grad_norm': 1.5508872270584106, 'learning_rate': 0.0005611510791366907, 'epoch': 7.19}


 75%|███████▌  | 11500/15290 [20:13<06:04, 10.39it/s]

{'loss': 0.2078, 'grad_norm': 0.018924830481410027, 'learning_rate': 0.0004957488554610857, 'epoch': 7.52}


 78%|███████▊  | 12000/15290 [21:02<05:17, 10.35it/s]

{'loss': 0.2048, 'grad_norm': 0.4325437843799591, 'learning_rate': 0.0004303466317854807, 'epoch': 7.85}


 80%|████████  | 12232/15290 [21:25<05:17,  9.64it/s]
 80%|████████  | 12233/15290 [21:35<2:35:10,  3.05s/it]

{'eval_loss': 2.1106364727020264, 'eval_accuracy': 0.6445444319460067, 'eval_f1': 0.6420580385934006, 'eval_precision': 0.6450586697308981, 'eval_recall': 0.6445444319460067, 'eval_runtime': 9.9815, 'eval_samples_per_second': 89.065, 'eval_steps_per_second': 22.341, 'epoch': 8.0}


 82%|████████▏ | 12500/15290 [22:02<04:39,  9.99it/s]  

{'loss': 0.1363, 'grad_norm': 0.036238424479961395, 'learning_rate': 0.0003649444081098757, 'epoch': 8.18}


 85%|████████▌ | 13000/15290 [22:52<03:38, 10.48it/s]

{'loss': 0.1467, 'grad_norm': 0.0024300916120409966, 'learning_rate': 0.0002995421844342708, 'epoch': 8.5}


 88%|████████▊ | 13500/15290 [23:41<02:50, 10.48it/s]

{'loss': 0.1532, 'grad_norm': 0.03939608111977577, 'learning_rate': 0.0002341399607586658, 'epoch': 8.83}


 90%|█████████ | 13761/15290 [24:06<02:33,  9.98it/s]
 90%|█████████ | 13762/15290 [24:16<45:27,  1.79s/it]

{'eval_loss': 2.214906692504883, 'eval_accuracy': 0.6580427446569179, 'eval_f1': 0.6550346129074145, 'eval_precision': 0.6537672246403903, 'eval_recall': 0.6580427446569179, 'eval_runtime': 9.5409, 'eval_samples_per_second': 93.177, 'eval_steps_per_second': 23.373, 'epoch': 9.0}


 92%|█████████▏| 14000/15290 [24:39<02:04, 10.37it/s]

{'loss': 0.1349, 'grad_norm': 0.06357219815254211, 'learning_rate': 0.00016873773708306084, 'epoch': 9.16}


 95%|█████████▍| 14500/15290 [25:29<01:19,  9.95it/s]

{'loss': 0.0913, 'grad_norm': 4.580182075500488, 'learning_rate': 0.00010333551340745586, 'epoch': 9.48}


 98%|█████████▊| 15000/15290 [26:20<00:28, 10.05it/s]

{'loss': 0.0904, 'grad_norm': 1.0164124965667725, 'learning_rate': 3.793328973185088e-05, 'epoch': 9.81}


100%|█████████▉| 15289/15290 [26:49<00:00,  9.68it/s]
100%|██████████| 15290/15290 [26:59<00:00,  9.44it/s]


{'eval_loss': 2.2727625370025635, 'eval_accuracy': 0.6580427446569179, 'eval_f1': 0.6566947730764686, 'eval_precision': 0.6563216315224196, 'eval_recall': 0.6580427446569179, 'eval_runtime': 9.8184, 'eval_samples_per_second': 90.544, 'eval_steps_per_second': 22.712, 'epoch': 10.0}
{'train_runtime': 1619.53, 'train_samples_per_second': 37.764, 'train_steps_per_second': 9.441, 'train_loss': 0.49033701006613034, 'epoch': 10.0}


# test fragmentu danych bez i z wygenerowanymi

In [5]:
model_gen = run_model('dkleczek/bert-base-polish-uncased-v1', dataset, use_lora = True, train_reduction=0.05)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3552 [00:00<?, ? examples/s]

Map:   0%|          | 0/889 [00:00<?, ? examples/s]

Map:   0%|          | 0/178 [00:00<?, ? examples/s]

Map:   0%|          | 0/889 [00:00<?, ? examples/s]

  0%|          | 0/450 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.2316957712173462, 'eval_accuracy': 0.4735658042744657, 'eval_f1': 0.37817897577078763, 'eval_precision': 0.5500440180088342, 'eval_recall': 0.4735658042744657, 'eval_runtime': 11.04, 'eval_samples_per_second': 80.525, 'eval_steps_per_second': 20.199, 'epoch': 1.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.047713279724121, 'eval_accuracy': 0.5748031496062992, 'eval_f1': 0.5329771706873863, 'eval_precision': 0.5392800966866307, 'eval_recall': 0.5748031496062992, 'eval_runtime': 10.6003, 'eval_samples_per_second': 83.865, 'eval_steps_per_second': 21.037, 'epoch': 2.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.1018809080123901, 'eval_accuracy': 0.5523059617547806, 'eval_f1': 0.47954553801320227, 'eval_precision': 0.5428457806410563, 'eval_recall': 0.5523059617547806, 'eval_runtime': 10.1973, 'eval_samples_per_second': 87.18, 'eval_steps_per_second': 21.869, 'epoch': 3.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.06342613697052, 'eval_accuracy': 0.5984251968503937, 'eval_f1': 0.5647747490932047, 'eval_precision': 0.5984526371440653, 'eval_recall': 0.5984251968503937, 'eval_runtime': 9.6849, 'eval_samples_per_second': 91.792, 'eval_steps_per_second': 23.025, 'epoch': 4.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.0957103967666626, 'eval_accuracy': 0.5849268841394826, 'eval_f1': 0.5825459406553161, 'eval_precision': 0.5948382301640246, 'eval_recall': 0.5849268841394826, 'eval_runtime': 9.358, 'eval_samples_per_second': 94.999, 'eval_steps_per_second': 23.83, 'epoch': 5.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.4476534128189087, 'eval_accuracy': 0.592800899887514, 'eval_f1': 0.5632294204712777, 'eval_precision': 0.6271180556231564, 'eval_recall': 0.592800899887514, 'eval_runtime': 9.3381, 'eval_samples_per_second': 95.201, 'eval_steps_per_second': 23.881, 'epoch': 6.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.2643530368804932, 'eval_accuracy': 0.6107986501687289, 'eval_f1': 0.6152377536994156, 'eval_precision': 0.6271697048957953, 'eval_recall': 0.6107986501687289, 'eval_runtime': 9.5531, 'eval_samples_per_second': 93.058, 'eval_steps_per_second': 23.343, 'epoch': 7.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.375073790550232, 'eval_accuracy': 0.6377952755905512, 'eval_f1': 0.6222534636281133, 'eval_precision': 0.6294415962443196, 'eval_recall': 0.6377952755905512, 'eval_runtime': 9.442, 'eval_samples_per_second': 94.154, 'eval_steps_per_second': 23.618, 'epoch': 8.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.417640209197998, 'eval_accuracy': 0.625421822272216, 'eval_f1': 0.6160015325582564, 'eval_precision': 0.6229914029398372, 'eval_recall': 0.625421822272216, 'eval_runtime': 9.5094, 'eval_samples_per_second': 93.487, 'eval_steps_per_second': 23.451, 'epoch': 9.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.4519213438034058, 'eval_accuracy': 0.6287964004499438, 'eval_f1': 0.6173846348403295, 'eval_precision': 0.626586172001631, 'eval_recall': 0.6287964004499438, 'eval_runtime': 9.6147, 'eval_samples_per_second': 92.462, 'eval_steps_per_second': 23.194, 'epoch': 10.0}
{'train_runtime': 145.4076, 'train_samples_per_second': 12.241, 'train_steps_per_second': 3.095, 'train_loss': 0.496083984375, 'epoch': 10.0}


In [9]:
model_gen = run_model('dkleczek/bert-base-polish-uncased-v1', dataset, use_lora = True, extra_df=generated, train_reduction=0.05)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dkleczek/bert-base-polish-uncased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3420 [00:00<?, ?it/s]

  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 2.051175117492676, 'eval_accuracy': 0.33070866141732286, 'eval_f1': 0.24367805242898508, 'eval_precision': 0.5098575936485538, 'eval_recall': 0.33070866141732286, 'eval_runtime': 10.5582, 'eval_samples_per_second': 84.2, 'eval_steps_per_second': 21.121, 'epoch': 1.0}
{'loss': 1.1864, 'grad_norm': 10.053227424621582, 'learning_rate': 0.0017076023391812865, 'epoch': 1.46}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.414591670036316, 'eval_accuracy': 0.44769403824521936, 'eval_f1': 0.4160105751628156, 'eval_precision': 0.48254250855292474, 'eval_recall': 0.44769403824521936, 'eval_runtime': 10.5389, 'eval_samples_per_second': 84.354, 'eval_steps_per_second': 21.16, 'epoch': 2.0}
{'loss': 0.8953, 'grad_norm': 2.2248589992523193, 'learning_rate': 0.0014152046783625731, 'epoch': 2.92}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.5281288623809814, 'eval_accuracy': 0.4611923509561305, 'eval_f1': 0.46291343689262576, 'eval_precision': 0.55201504848921, 'eval_recall': 0.4611923509561305, 'eval_runtime': 10.505, 'eval_samples_per_second': 84.626, 'eval_steps_per_second': 21.228, 'epoch': 3.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 1.906913161277771, 'eval_accuracy': 0.45894263217097864, 'eval_f1': 0.44626613959764394, 'eval_precision': 0.56035014904827, 'eval_recall': 0.45894263217097864, 'eval_runtime': 10.7447, 'eval_samples_per_second': 82.738, 'eval_steps_per_second': 20.754, 'epoch': 4.0}
{'loss': 0.5465, 'grad_norm': 7.312228679656982, 'learning_rate': 0.0011228070175438596, 'epoch': 4.39}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 2.3124516010284424, 'eval_accuracy': 0.45219347581552305, 'eval_f1': 0.432031346397794, 'eval_precision': 0.5730279165816582, 'eval_recall': 0.45219347581552305, 'eval_runtime': 10.6954, 'eval_samples_per_second': 83.12, 'eval_steps_per_second': 20.85, 'epoch': 5.0}
{'loss': 0.3492, 'grad_norm': 1.220110297203064, 'learning_rate': 0.0008304093567251462, 'epoch': 5.85}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 2.3614089488983154, 'eval_accuracy': 0.4881889763779528, 'eval_f1': 0.4830781294820109, 'eval_precision': 0.5602641259087838, 'eval_recall': 0.4881889763779528, 'eval_runtime': 10.6408, 'eval_samples_per_second': 83.546, 'eval_steps_per_second': 20.957, 'epoch': 6.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 2.493682622909546, 'eval_accuracy': 0.5208098987626547, 'eval_f1': 0.5235324450405451, 'eval_precision': 0.580239512922263, 'eval_recall': 0.5208098987626547, 'eval_runtime': 10.9239, 'eval_samples_per_second': 81.381, 'eval_steps_per_second': 20.414, 'epoch': 7.0}
{'loss': 0.1625, 'grad_norm': 0.005689745768904686, 'learning_rate': 0.0005380116959064328, 'epoch': 7.31}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 2.899261951446533, 'eval_accuracy': 0.5016872890888638, 'eval_f1': 0.5051762553763032, 'eval_precision': 0.5463616603902283, 'eval_recall': 0.5016872890888638, 'eval_runtime': 11.1252, 'eval_samples_per_second': 79.909, 'eval_steps_per_second': 20.045, 'epoch': 8.0}
{'loss': 0.0787, 'grad_norm': 0.022471295669674873, 'learning_rate': 0.0002456140350877193, 'epoch': 8.77}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 3.1391661167144775, 'eval_accuracy': 0.48931383577052867, 'eval_f1': 0.48765796619016716, 'eval_precision': 0.5544006265796164, 'eval_recall': 0.48931383577052867, 'eval_runtime': 4.4566, 'eval_samples_per_second': 199.482, 'eval_steps_per_second': 50.039, 'epoch': 9.0}


  0%|          | 0/223 [00:00<?, ?it/s]

{'eval_loss': 3.1918423175811768, 'eval_accuracy': 0.48368953880764903, 'eval_f1': 0.48248057412672435, 'eval_precision': 0.5553631139021648, 'eval_recall': 0.48368953880764903, 'eval_runtime': 10.782, 'eval_samples_per_second': 82.453, 'eval_steps_per_second': 20.683, 'epoch': 10.0}
{'train_runtime': 473.0849, 'train_samples_per_second': 28.917, 'train_steps_per_second': 7.229, 'train_loss': 0.4758354733561912, 'epoch': 10.0}
