In [1]:
# %pip install -q datasets evaluate --upgrade

# %pip install -q transformers==4.26.0



!python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"

import pandas as pd

from datasets import DatasetDict, Dataset, load_dataset, concatenate_datasets

In [3]:
import dataclasses

import transformers
import torch
from torch.utils.data.dataloader import DataLoader

from transformers.data.data_collator import InputDataClass

from torch.utils.data.distributed import DistributedSampler

from torch.utils.data.sampler import RandomSampler

from typing import List, Union, Dict



class NLPDataCollator:

    def __call__(

        self, features: List[Union[InputDataClass, Dict]]

    ) -> Dict[str, torch.Tensor]:

        first = features[0]

        if isinstance(first, dict):

            if "labels" in first and first["labels"] is not None:

                if first["labels"].dtype == torch.int64:

                    labels = torch.tensor(

                        [f["labels"] for f in features], dtype=torch.long

                    )

                else:

                    if len(first["labels"]) > 1: 

                          labels = torch.stack(

                            [f["labels"] for f in features])

                    else:

                          labels = torch.tensor(

                            [f["labels"] for f in features], dtype=torch.float

                          )

                batch = {"labels": labels}

            else:

                  batch = {}

            for k, v in first.items():

                if k != "labels" and v is not None and not isinstance(v, str):

                    batch[k] = torch.stack([f[k] for f in features])

            return batch

        else:

            return DefaultDataCollator().collate_batch(features)



class StrIgnoreDevice(str):

    def to(self, device):

        return self



class DataLoaderWithTaskname:

    def __init__(self, task_name, data_loader):

        self.task_name = task_name

        self.data_loader = data_loader



        self.batch_size = data_loader.batch_size

        self.dataset = data_loader.dataset



    def __len__(self):

        return len(self.data_loader)



    def __iter__(self):

        for batch in self.data_loader:

            batch["task_name"] = StrIgnoreDevice(self.task_name)

            yield batch



class MultitaskDataloader:

    def __init__(self, dataloader_dict):

        self.dataloader_dict = dataloader_dict

        self.num_batches_dict = {

            task_name: len(dataloader)

            for task_name, dataloader in self.dataloader_dict.items()

        }



        self.task_name_list = list(self.dataloader_dict)

        self.dataset = [None] * sum(

            len(dataloader.dataset) for dataloader in self.dataloader_dict.values()

        )



    def __len__(self):

        return sum(self.num_batches_dict.values())



    def __iter__(self):

        task_choice_list = []

        for i, task_name in enumerate(self.task_name_list):

            task_choice_list += [i] * self.num_batches_dict[task_name]

        task_choice_list = np.array(task_choice_list)

        np.random.shuffle(task_choice_list)

        dataloader_iter_dict = {

            task_name: iter(dataloader)

            for task_name, dataloader in self.dataloader_dict.items()

        }



        for task_choice in task_choice_list:

            task_name = self.task_name_list[task_choice]

            yield next(dataloader_iter_dict[task_name])

In [4]:
import torch

from torch import nn

from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss



import transformers

from transformers import BertTokenizer

from transformers import models

from transformers.modeling_outputs import SequenceClassifierOutput



from transformers.models.bert.configuration_bert import BertConfig

from transformers.models.bert.modeling_bert import (

    BertPreTrainedModel,

    BERT_INPUTS_DOCSTRING,

    _CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,

    _CONFIG_FOR_DOC,

    BertModel,

)



from transformers.file_utils import (

    add_code_sample_docstrings,

    add_start_docstrings_to_model_forward,

)





class BertForSequenceClassification(BertPreTrainedModel):

    def __init__(self, config, **kwargs):

        super().__init__(transformers.PretrainedConfig())

        self.num_labels = kwargs.get("task_labels_map", {})

        self.config = config



        self.bert = BertModel(config)

        classifier_dropout = (

            config.classifier_dropout

            if config.classifier_dropout is not None

            else config.hidden_dropout_prob

        )

        self.dropout = nn.Dropout(classifier_dropout)



        self.classifier1 = nn.Linear(

            config.hidden_size, list(self.num_labels.values())[0]

        )



        self.classifier2 = nn.Linear(

            config.hidden_size, list(self.num_labels.values())[1]

        )



        self.classifier3 = nn.Linear(

            config.hidden_size, list(self.num_labels.values())[2]

        )



        self.init_weights()



    @add_start_docstrings_to_model_forward(

        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")

    )

    @add_code_sample_docstrings(

        checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,

        output_type=SequenceClassifierOutput,

        config_class=_CONFIG_FOR_DOC,

    )



    def forward(

        self,

        input_ids=None,

        attention_mask=None,

        token_type_ids=None,

        position_ids=None,

        head_mask=None,

        inputs_embeds=None,

        labels=None,

        output_attentions=None,

        output_hidden_states=None,

        return_dict=None,

        task_name=None,

    ):

        return_dict = (

            return_dict if return_dict is not None else self.config.use_return_dict

        )



        outputs = self.bert(

            input_ids,

            attention_mask=attention_mask,

            token_type_ids=token_type_ids,

            position_ids=position_ids,

            head_mask=head_mask,

            inputs_embeds=inputs_embeds,

            output_attentions=output_attentions,

            output_hidden_states=output_hidden_states,

            return_dict=return_dict,

        )



        pooled_output = outputs[1]



        pooled_output = self.dropout(pooled_output)

        logits = None



        if task_name == list(self.num_labels.keys())[0]:

            logits = self.classifier1(pooled_output)

        if task_name == list(self.num_labels.keys())[1]:

            logits = self.classifier2(pooled_output)

        if task_name == list(self.num_labels.keys())[2]:

            logits = self.classifier3(pooled_output)

        

        loss_fct = BCEWithLogitsLoss()

        loss = loss_fct(logits, labels)



        if task_name == list(self.num_labels.keys())[1]:

            loss = loss / 10

        if task_name == list(self.num_labels.keys())[2]:

            loss = loss / 10



        if not return_dict:

            output = (logits,) + outputs[2:]

            return ((loss,) + output) if loss is not None else output



        return SequenceClassifierOutput(

            loss=loss,

            logits=logits,

            hidden_states=outputs.hidden_states,

            attentions=outputs.attentions,

        )

## Load Models

In [5]:
%pip install -q evaluate

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, precision_score, recall_score

from transformers import EvalPrediction

import torch

import evaluate

import numpy as np



def multi_label_metrics(predictions, labels, threshold=0.5):

    sigmoid = torch.nn.Sigmoid()

    probs = sigmoid(torch.Tensor(predictions))

    y_pred = np.zeros(probs.shape)

    y_pred[np.where(probs >= threshold)] = 1

    y_true = labels

    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')

    roc_auc = roc_auc_score(y_true, y_pred, average = 'weighted')

    

    precision = precision_score(labels, y_pred, average='weighted')

    recall = recall_score(labels, y_pred, average='weighted')

    accuracy = accuracy_score(y_true, y_pred)

    metrics = {'f1': f1_micro_average,

               'roc_auc': roc_auc,

               'accuracy': accuracy,

               "pr": precision,

               "recall": recall

               }

    return metrics



def compute_metrics_multi_label(p):

    preds = p.predictions[0] if isinstance(p.predictions, 

            tuple) else p.predictions

    result = multi_label_metrics(

        predictions=preds, 

        labels=p.label_ids)

    return result

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union

from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler

import time

import math

import transformers



from transformers.trainer_utils import (

    PREFIX_CHECKPOINT_DIR,

    BestRun,

    EvalLoopOutput,

    EvalPrediction,

    FSDPOption,

    HPSearchBackend,

    HubStrategy,

    IntervalStrategy,

    PredictionOutput,

    RemoveColumnsCollator,

    ShardedDDPOption,

    TrainerMemoryTracker,

    TrainOutput,

    default_compute_objective,

    default_hp_space,

    denumpify_detensorize,

    enable_full_determinism,

    find_executable_batch_size,

    get_last_checkpoint,

    has_length,

    number_of_arguments,

    seed_worker,

    set_seed,

    speed_metrics,

)

from transformers.debug_utils import DebugOption, DebugUnderflowOverflow

import copy

import torch.nn.functional as F



class MultitaskDistillationTrainer(transformers.Trainer):

    def __init__(self, *args, evaluation_time = False, steps_redv2 = None, steps_laroseda = None, steps_moroco = None, teacher_redv2=None, teacher_laroseda = None, teacher_moroco = None, compute_metrics_multi_label = None,**kwargs):

        super().__init__(*args, **kwargs)

        

        self.compute_metrics_multi_label = compute_metrics_multi_label;

        self.evaluation_time = evaluation_time;



        self.teacher_redv2 = teacher_redv2

        self.teacher_laroseda = teacher_laroseda

        self.teacher_moroco = teacher_moroco



        self.contor_redv2 = 0;

        self.contor_laroseda = 0;

        self.contor_moroco = 0;



        self._move_model_to_device(self.teacher_redv2,self.model.device)

        self._move_model_to_device(self.teacher_laroseda,self.model.device)

        self._move_model_to_device(self.teacher_moroco,self.model.device)



        self.steps_redv2 = steps_redv2

        self.steps_laroseda = steps_laroseda

        self.steps_moroco = steps_moroco



        self.teacher_redv2.eval()

        self.teacher_laroseda.eval()

        self.teacher_moroco.eval()



    def compute_loss(self, model, inputs, return_outputs=False):

        outputs_student = model(**inputs)

        student_loss=outputs_student.loss



        if self.evaluation_time == True:

          return (student_loss, outputs_student) if return_outputs else student_loss



        with torch.no_grad():

          inputs_meu = copy.deepcopy(inputs)

          inputs_meu.pop('task_name')



          if inputs["task_name"] == "redv2":

            outputs_teacher_redv2 = self.teacher_redv2(**inputs_meu)

            # print(outputs_teacher_redv2, outputs_student, "REDV2 output")

            assert outputs_student.logits.size() == outputs_teacher_redv2.logits.size()



          if inputs["task_name"] == "laroseda":

            outputs_teacher_laroseda = self.teacher_laroseda(**inputs_meu)

            # print(outputs_teacher_laroseda, outputs_student, "laroseda output")

            assert outputs_student.logits.size() == outputs_teacher_laroseda.logits.size()



          if inputs["task_name"] == "moroco":

            outputs_teacher_moroco = self.teacher_moroco(**inputs_meu)

            # print(outputs_teacher_moroco, outputs_student, "moroco output")

            assert outputs_student.logits.size() == outputs_teacher_moroco.logits.size()



        loss_function = nn.KLDivLoss(reduction="batchmean")

        mse_function = nn.MSELoss(reduction='mean')

        loss = None

        

        if inputs["task_name"] == "redv2":
            self.contor_redv2 += 1;
            #alpha_i = min(self.contor_redv2 / self.steps_redv2, 0.85);
            alpha_i = 0.1 + min(self.contor_redv2 / self.steps_redv2 * (0.70 - 0.1), 0.70 - 0.1)


            loss_logits = (loss_function(
              F.log_softmax(outputs_student.logits / self.args.temperature, dim=-1),
              F.softmax(outputs_teacher_redv2.logits / self.args.temperature, dim=-1)) * (self.args.temperature ** 2))

            print("Redv2 alpha: ", alpha_i, "contor_Redv2", self.contor_redv2, "steps_redv2", self.steps_redv2)

            loss = alpha_i * student_loss + (1. - alpha_i) * loss_logits

        if inputs["task_name"] == "laroseda":
            self.contor_laroseda += 1;
            # alpha_i = min(self.contor_laroseda / self.steps_laroseda, 0.75);
            alpha_i = 0.1 + min(self.contor_redv2 / self.steps_redv2 * (0.70 - 0.1), 0.70 - 0.1)


            loss_logits = (loss_function(
              F.log_softmax(outputs_student.logits / self.args.temperature, dim=-1),
              F.softmax(outputs_teacher_laroseda.logits / self.args.temperature, dim=-1)) * (self.args.temperature ** 2))

            print("Laroseda alpha: ", alpha_i, "contor_laroseda", self.contor_laroseda, "steps_laroseda", self.steps_laroseda)

            loss = alpha_i * student_loss + (1. - alpha_i) * loss_logits

        if inputs["task_name"] == "moroco":
            self.contor_moroco += 1;
            # alpha_i = min(self.contor_moroco / self.steps_moroco, 0.75);
            alpha_i = 0.1 + min(self.contor_redv2 / self.steps_redv2 * (0.70 - 0.1), 0.70 - 0.1)

            loss_logits = (loss_function(
              F.log_softmax(outputs_student.logits / self.args.temperature, dim=-1),
              F.softmax(outputs_teacher_moroco.logits / self.args.temperature, dim=-1)) * (self.args.temperature ** 2))

            print("moroco alpha: ", alpha_i, "contor_moroco", self.contor_moroco, "steps_moroco", self.steps_moroco)

            loss = alpha_i * student_loss + (1. - alpha_i) * loss_logits



        # loss = self.args.alpha * student_loss + (1. - self.args.alpha) * loss_logits

        # print("Alpha:", self.args.alpha, "Temperature:", self.args.temperature, loss, "return:", return_outputs)

        # print(inputs["task_name"])

        # print("Student loss:", student_loss.item())

        # print("Distillation loss:", loss_logits.item())

        # print("Combined loss:", loss.item())

        # print()

        return (loss, outputs_student) if return_outputs else loss



    def get_single_train_dataloader(self, task_name, train_dataset):

        if self.train_dataset is None:

            raise ValueError("Trainer: training requires a train_dataset.")



        train_sampler = (

            RandomSampler(train_dataset)

            if self.args.local_rank == -1

            else DistributedSampler(train_dataset)

        )



        data_loader = DataLoaderWithTaskname(

            task_name=task_name,

            data_loader=DataLoader(

                train_dataset,

                batch_size=self.args.train_batch_size,

                sampler=train_sampler,

                collate_fn=self.data_collator,

            ),

        )

        return data_loader



    def get_train_dataloader(self):

        return MultitaskDataloader(

            {

                task_name: self.get_single_train_dataloader(task_name, task_dataset)

                for task_name, task_dataset in self.train_dataset.items()

            }

        )

    

    def get_eval_dataloader(self, eval_dataset, task_name):

          return MultitaskDataloader({

              task_name: self.get_single_train_dataloader(task_name, eval_dataset)

          })





    def get_test_dataloader(self, test_dataset):

          return MultitaskDataloader(

              {

                  task_name: self.get_single_train_dataloader(task_name, task_dataset)

                  for task_name, task_dataset in test_dataset.items()

              }

          )



    def _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval):

        if self.control.should_log:

            logs: Dict[str, float] = {}



            tr_loss_scalar = self._nested_gather(tr_loss).mean().item()

            tr_loss -= tr_loss



            logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4)

            logs["learning_rate"] = self._get_learning_rate()



            self._total_loss_scalar += tr_loss_scalar

            self._globalstep_last_logged = self.state.global_step

            self.store_flos()



            self.log(logs)



        metrics = None

        if self.control.should_evaluate:

            if isinstance(self.eval_dataset, dict):

                for eval_dataset_name, eval_dataset in self.eval_dataset.items():

                    metrics = self.evaluate(

                        eval_dataset=eval_dataset,

                        ignore_keys=ignore_keys_for_eval,

                        eval_dataset_name = eval_dataset_name,

                        metric_key_prefix=f"eval_{eval_dataset_name}",

                    )

            else:

                metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)

            self._report_to_hp_search(trial, self.state.global_step, metrics)



        if self.control.should_save:

            self.control = self.callback_handler.on_save(self.args, self.state, self.control)

    

    def predict(

        self, test_dataset: Dataset, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "test", test_dataset_name = None

    ) -> PredictionOutput:



        self._memory_tracker.start()



        test_dataloader = self.get_test_dataloader(test_dataset)

        start_time = time.time()



        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop

        self.compute_metrics = self.compute_metrics_multi_label;



        output = eval_loop(

            test_dataloader, description="Prediction", ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix

        )

        

        total_batch_size = self.args.eval_batch_size * self.args.world_size

        if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:

            start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]

        output.metrics.update(

            speed_metrics(

                metric_key_prefix,

                start_time,

                num_samples=output.num_samples,

                num_steps=math.ceil(output.num_samples / total_batch_size),

            )

        )



        self.control = self.callback_handler.on_predict(self.args, self.state, self.control, output.metrics)

        self._memory_tracker.stop_and_update_metrics(output.metrics)

        return PredictionOutput(predictions=output.predictions, label_ids=output.label_ids, metrics=output.metrics)

    

    def evaluate(

        self,

        eval_dataset: Optional[Dataset] = None,

        ignore_keys: Optional[List[str]] = None,

        eval_dataset_name : str = "none",

        metric_key_prefix: str = "eval",

    ) -> Dict[str, float]:



        self._memory_tracker.start()



        eval_dataloader = self.get_eval_dataloader(eval_dataset, eval_dataset_name)

        start_time = time.time()

        # self.compute_metrics = self.compute_metrics_multi_label;



        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop

        output = eval_loop(

            eval_dataloader,

            description="Evaluation",

            prediction_loss_only=True if self.compute_metrics is None else None,

            ignore_keys=ignore_keys,

            metric_key_prefix=metric_key_prefix,

        )



        total_batch_size = self.args.eval_batch_size * self.args.world_size

        if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:

            start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]

        output.metrics.update(

            speed_metrics(

                metric_key_prefix,

                start_time,

                num_samples=output.num_samples,

                num_steps=math.ceil(output.num_samples / total_batch_size),

            )

        )



        self.log(output.metrics)



        if DebugOption.TPU_METRICS_DEBUG in self.args.debug:

            xm.master_print(met.metrics_report())



        print(output.metrics, " = ", eval_dataset_name)



        self._memory_tracker.stop_and_update_metrics(output.metrics)

        return output.metrics

ImportError: cannot import name 'ShardedDDPOption' from 'transformers.trainer_utils' (C:\Users\vmatei\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\trainer_utils.py)

In [None]:
from transformers import TrainingArguments





class DistillationTrainingArguments(TrainingArguments):

    def __init__(self, *args, alpha = 0.5, temperature = 1.0, **kwargs):

        super().__init__(*args, **kwargs)

        self.alpha = alpha

        self.temperature = temperature


## Trainer args

In [15]:
# import logging

# import torch

# import nltk

# import numpy as np

# from datasets import load_dataset

# from torch.utils.data.dataloader import DataLoader

# from tqdm.auto import tqdm

# from tqdm import tqdm as tqdm1



# import transformers

# from filelock import FileLock

# from transformers import set_seed

# from transformers.file_utils import is_offline_mode

# from pathlib import Path

# from huggingface_hub import HfFolder



# dataset_dict = features_dict;

# multitask_model = BertForSequenceClassification.from_pretrained(

#     "mateiaassAI/mtkd-student-ta-1.5-common-steps-0.1-0.7",

#     task_labels_map={"redv2": 7, "laroseda": 2, "moroco": 6},

#     classifier_dropout = 0.2, problem_type="multi_label_classification"

# )



# train_dataset = {

#     task_name: dataset["train"] for task_name, dataset in features_dict.items()

# }



# test_dataset = {

#     task_name: dataset["test"] for task_name, dataset in features_dict.items()

# }



# eval_dataset = {

#     task_name: dataset["test"] for task_name, dataset in features_dict.items()

# }



# test_dataset['moroco'] = test_dataset['moroco'].select(range(500))



# training_args = DistillationTrainingArguments(

#     output_dir="mtkd-student-ta-1-common-steps-0.1-0.7",

#     overwrite_output_dir=True,

#     learning_rate=2e-05,

#     do_train=True,

#     weight_decay=0.01,

#     num_train_epochs=3,

#     per_device_train_batch_size = 16,

#     per_device_eval_batch_size = 16,

#     hub_token = HfFolder.get_token(),

#     evaluation_strategy="steps",

#     eval_steps=500,

#     push_to_hub=False,

#     report_to="none"

# )





# trainer = MultitaskDistillationTrainer(

#     model = multitask_model,

#     args = training_args,

#     teacher_redv2 = teacher_redv2,

#     teacher_laroseda = teacher_laroseda,

#     teacher_moroco = teacher_moroco,

#     data_collator = NLPDataCollator(),

#     train_dataset = train_dataset,

#     eval_dataset = test_dataset,

#     compute_metrics = compute_metrics_multi_label,

#     compute_metrics_multi_label = compute_metrics_multi_label,
#     steps_redv2 = (len(train_dataset["redv2"])) * 3 / 16 + (len(test_dataset["redv2"])) * 3/ 16,
#     steps_laroseda = (len(train_dataset["laroseda"])) * 3 / 16 + (len(test_dataset["laroseda"])) * 3 / 16,
#     steps_moroco = (len(train_dataset["moroco"])) * 3 / 16 + (len(test_dataset["moroco"])) * 3 / 16,
# )

pytorch_model.bin:   0%|          | 0.00/500M [00:00<?, ?B/s]

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at dumitrescustefan/bert-base-romanian-cased-v1 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCl

In [24]:
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
import torch

# 1. Încarcă tokenizer-ul
model_name = "mateiaassAI/mtkd-student-ta-1.5-common-steps-0.1-0.7"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. Pregătește propozițiile pentru inferență
sentences = [
    "I am feeling very happy today!",
    "This is a very sad story.",
    "I am so angry about what happened!",
    "This event was shocking and surprising.",
    "I don't know how to feel about this.",
]

# 3. Creează un dataset pentru inferență
class InferenceDataset(Dataset):
    def __init__(self, sentences, tokenizer, task_name="redv2"):
        self.sentences = sentences
        self.tokenizer = tokenizer
        self.task_name = task_name

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.sentences[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt",
        )
        # Adaugă "task_name" în format compatibil cu modelul
        inputs = {key: val.squeeze(0) for key, val in inputs.items()}
        inputs["task_name"] = self.task_name
        return inputs



In [25]:
inference_dataset = InferenceDataset(sentences, tokenizer, task_name="redv2")
inference_dataloader = DataLoader(inference_dataset, batch_size=2)

model = BertForSequenceClassification.from_pretrained(

    "mateiaassAI/mtkd-student-ta-1.5-common-steps-0.1-0.7",

    task_labels_map={"redv2": 7, "laroseda": 2, "moroco": 6},

    classifier_dropout = 0.2, problem_type="multi_label_classification"

)

trainer = MultitaskDistillationTrainer(
    model=model,
    args=None, 
)

# 6. Rulează inferența folosind metoda `predict`
predictions = trainer.predict(test_dataset=inference_dataset)

# 7. Post-procesare și interpretare
logits = predictions.predictions  # Logits de la model
probabilities = torch.softmax(torch.tensor(logits), dim=-1)  # Transformă logits în probabilități
predicted_classes = torch.argmax(probabilities, dim=-1)  # Clasele prezise

# 8. Afișează rezultatele
emotion_labels = ["happy", "sad", "angry", "surprised", "neutral", "fear", "disgust"]  # Clasele tale
for sentence, probs, pred_class in zip(sentences, probabilities, predicted_classes):
    print(f"Sentence: {sentence}")
    print(f"Predicted probabilities: {probs.tolist()}")
    print(f"Predicted emotion: {emotion_labels[pred_class]}")
    print("---")


NameError: name 'MultitaskDistillationTrainer' is not defined