# Import libraries

In [None]:
import json
import os
import datasets
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
import transformers
import dataclasses
from torch.utils.data.dataloader import DataLoader
from transformers.data.data_collator import DataCollator, InputDataClass
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler
from typing import List, Union, Dict
import numpy as np
import torch
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers import BertTokenizer
from transformers import models
from transformers.modeling_outputs import SequenceClassifierOutput

from transformers.models.bert.configuration_bert import BertConfig
from transformers.models.bert.modeling_bert import (
    BertPreTrainedModel,
    BERT_INPUTS_DOCSTRING,
    _TOKENIZER_FOR_DOC,
    _CHECKPOINT_FOR_DOC,
    _CONFIG_FOR_DOC,
    BertModel,
)
from transformers.models.distilbert.modeling_distilbert import (
    DistilBertPreTrainedModel,
    DISTILBERT_INPUTS_DOCSTRING,
    _TOKENIZER_FOR_DOC,
    _CHECKPOINT_FOR_DOC,
    _CONFIG_FOR_DOC,
    DistilBertModel,
)

from transformers.models.xlm.modeling_xlm import (XLMPreTrainedModel,
                                                  XLM_INPUTS_DOCSTRING,
                                                  _TOKENIZER_FOR_DOC,
                                                  _CHECKPOINT_FOR_DOC,
                                                  _CONFIG_FOR_DOC,
                                                  XLMModel,)
from transformers.models.roberta.modeling_roberta import (
    RobertaPreTrainedModel,
    ROBERTA_INPUTS_DOCSTRING,
    _TOKENIZER_FOR_DOC,
    _CHECKPOINT_FOR_DOC,
    _CONFIG_FOR_DOC,
    RobertaModel,
)
from transformers.models.electra.modeling_electra import (
    ElectraPreTrainedModel,
    ELECTRA_INPUTS_DOCSTRING,
    _TOKENIZER_FOR_DOC,
    _CHECKPOINT_FOR_DOC,
    _CONFIG_FOR_DOC,
    ElectraModel,
)

from transformers.models.xlm_roberta.modeling_xlm_roberta import (
    XLMRobertaModel,
)
from transformers.file_utils import (
    add_code_sample_docstrings,
    add_start_docstrings_to_model_forward,
)
import logging
import nltk
import numpy as np
from datasets import load_dataset, load_metric
from torch.utils.data.dataloader import DataLoader
from tqdm.auto import tqdm
from tqdm import tqdm as tqdm1

import transformers
# from accelerate import Accelerator
from filelock import FileLock
from transformers import set_seed
from transformers.file_utils import is_offline_mode
from pathlib import Path
from sklearn.metrics import classification_report



In [None]:
from transformers import PreTrainedModel

# Dataset Define

In [None]:

logger = datasets.logging.get_logger(__name__)


class DatasetConfig(datasets.BuilderConfig):
  """BuilderConfig for Dataset."""

  def __init__(self, **kwargs):
      """BuilderConfig for MultitaskDataset.
      Args:
        **kwargs: keyword arguments forwarded to super.
      """
      super(DatasetConfig, self).__init__(**kwargs)


class MultitaskDataset(datasets.GeneratorBasedBuilder):
  """MultitaskDataset: Version 1.0.0"""

  BUILDER_CONFIGS = [
      DatasetConfig(
          name="plain_text",
          version=datasets.Version("1.0.0", ""),
          description="Plain text",
      ),
  ]

  def _info(self):
      return datasets.DatasetInfo(
          description="Multitask dataset",
          features=datasets.Features(
              {
                  "processed_data": datasets.Value("string"),
                  "target": datasets.Value("string"),
              }
          ),
          # No default supervised_keys (as we have to pass both question
          # and context as input).
          supervised_keys=None,
          homepage="",
          citation="",
      )

  def _split_generators(self, dl_manager):
      downloaded_files = dl_manager.download_and_extract(self.config.data_files)
      return [
          datasets.SplitGenerator(
              name=datasets.Split.TRAIN,
              gen_kwargs={"filepath": downloaded_files["train"]},
          ),
          datasets.SplitGenerator(
              name=datasets.Split.VALIDATION,
              gen_kwargs={"filepath": downloaded_files["val"]},
          ),
          datasets.SplitGenerator(
              name=datasets.Split.TEST,
              gen_kwargs={"filepath": downloaded_files["test"]},
          ),
      ]

  def _generate_examples(self, filepath):
      """This function returns the examples in the raw (text) form."""
      logger.info("generating examples from = %s", filepath)
      data = None
      if ".tsv" in filepath:
          data = pd.read_csv(filepath, sep="\t")
      else:
          data = pd.read_csv(filepath)

      for idx, row in data.iterrows():
          yield row["id"], {
              "processed_data": row["processed_data"],
              "target": row["target"],
          }

# Tokenization and model

## PhoBert_large

In [None]:

def phobert_large_convert_to_features(
    example_batch, model_name="vinai/phobert-large", max_length=200
):
    tokenizer = transformers.PhobertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features
class Phobert_large_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.roberta = RobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        # print(outputs)
        # print("*"*100)
        # print(outputs[1])
        
        # print(outputs[1].size())

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

def phobert_large_model_loading():    
    multitask_model = Phobert_large_model.from_pretrained(
            'vinai/phobert-large',
            task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                            'colour':3, 'shipping':3, 'packing':3},
        )
    return multitask_model

## PhoBert_base

In [None]:

def phobert_base_convert_to_features(
    example_batch, model_name="vinai/phobert-base", max_length=200
):
    tokenizer = transformers.PhobertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features

class phobert_base_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.roberta = RobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        # print(outputs)
        # print("*"*100)
        # print(outputs[1])
        
        # print(outputs[1].size())

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

def phobert_base_model_loading():
    multitask_model = phobert_base_model.from_pretrained(
            'vinai/phobert-base',
            task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                            'colour':3, 'shipping':3, 'packing':3},
        )
    return multitask_model

## Velectra

In [None]:

def velectra_base_convert_to_features(
    example_batch, model_name="FPTAI/velectra-base-discriminator-cased", max_length=128
):
    tokenizer = transformers.ElectraTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features


class velectra_base_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.electra = RobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.electra(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        

def velectra_base_model_loading():
    multitask_model = velectra_base_model.from_pretrained(
        'FPTAI/velectra-base-discriminator-cased',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## Bert-base

In [None]:

def bert_base_convert_to_features(
    example_batch, model_name="bert-base-uncased", max_length=128
):
    tokenizer = transformers.BertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features

class bert_base_model(BertPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = BertModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        
        print(outputs[1].shape)
        print(outputs.last_hidden_state.shape)

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
def bert_base_model_loading():
    multitask_model = bert_base_model.from_pretrained(
            'bert-base-uncased',
            task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                            'colour':3, 'shipping':3, 'packing':3},
        )
    return multitask_model

## Bert_large

In [None]:

def bert_large_convert_to_features(
    example_batch, model_name="bert-large-uncased", max_length=128
):
    tokenizer = transformers.BertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features


class bert_large_model(BertPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = BertModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        

def bert_large_model_loading():
    multitask_model = bert_large_model.from_pretrained(
            'bert-large-uncased',
            task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                            'colour':3, 'shipping':3, 'packing':3},
        )
    return multitask_model

## DistilBert-base

In [None]:

def distilbert_base_convert_to_features(
    example_batch, model_name="distilbert-base-uncased", max_length=128
):
    tokenizer = transformers.DistilBertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features

class distilbert_base_model(DistilBertPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = DistilBertModel(config)
        classifier_dropout = (
            config.seq_classif_dropout
            if config.seq_classif_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_state = outputs[0]
        pooled_output = hidden_state[:, 0] 

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        

def distilbert_base_model_loading():
    multitask_model = distilbert_base_model.from_pretrained(
        'distilbert-base-uncased',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## DistilBert large

In [None]:

def distilbert_large_convert_to_features(
    example_batch, model_name="distilbert-base-multilingual-cased", max_length=128
):
    tokenizer = transformers.DistilBertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features


class distilbert_large_model(DistilBertPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = DistilBertModel(config)
        classifier_dropout = (
            config.seq_classif_dropout
            if config.seq_classif_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        hidden_state = outputs[0]
        pooled_output = hidden_state[:, 0] 

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        
        
def distilbert_large_model_loading():
    multitask_model = distilbert_large_model.from_pretrained(
        'distilbert-base-multilingual-cased',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## XLM-Roberta

In [None]:

def xlm_base_convert_to_features(
    example_batch, model_name="xlm-roberta-base", max_length=128
):
    tokenizer = transformers.XLMRobertaTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features

class xlm_base_model(XLMPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = XLMRobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    # @add_code_sample_docstrings(
    #     tokenizer_class=_TOKENIZER_FOR_DOC,
    #     checkpoint=_CHECKPOINT_FOR_DOC,
    #     output_type=SequenceClassifierOutput,
    #     config_class=_CONFIG_FOR_DOC,
    # ) 
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        hidden_state = outputs[0]
        pooled_output = hidden_state[:, 0] 

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        

def xlm_base_model_loading():
    multitask_model = xlm_base_model.from_pretrained(
        'xlm-roberta-base',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## XLM-Roberta_large

In [None]:

def xlm_large_convert_to_features(
    example_batch, model_name="xlm-roberta-large", max_length=128
):
    tokenizer = transformers.XLMRobertaTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features


class xlm_large_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = XLMRobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    # @add_code_sample_docstrings(
    #     tokenizer_class=_TOKENIZER_FOR_DOC,
    #     checkpoint=_CHECKPOINT_FOR_DOC,
    #     output_type=SequenceClassifierOutput,
    #     config_class=_CONFIG_FOR_DOC,
    # ) 
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        
        
        
def xlm_large_model_loading():
    multitask_model = xlm_large_model.from_pretrained(
        'xlm-roberta-large',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## Vibert

In [None]:

def vibert_base_convert_to_features(
    example_batch, model_name="FPTAI/vibert-base-cased", max_length=128
):
    tokenizer = transformers.BertTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding="max_length",
    )
    features["labels"] = example_batch["target"]
    return features

class vibert_base_model(BertPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.bert = BertModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        print(outputs)
        print("*"*100)
        print(outputs[1])
        
        print(outputs[1].size())

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        
def vibert_base_model_loading():
    multitask_model = vibert_base_model.from_pretrained(
        'FPTAI/vibert-base-cased',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},)
    return multitask_model


## Roberta-base

In [None]:

def roberta_base_convert_to_features(
    example_batch, model_name='roberta-base', max_length=128
):
    tokenizer = transformers.RobertaTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding='max_length',
    )
    features["labels"] = example_batch["target"]
    return features


class roberta_base_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.roberta = RobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        print(outputs)
        print("*"*100)
        print(outputs[1])
        
        print(outputs[1].size())

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        
        
def roberta_base_model_loading():
    multitask_model = roberta_base_model.from_pretrained(
        'roberta-base',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

## Roberta-large

In [None]:

def roberta_large_convert_to_features(
    example_batch, model_name='roberta-large', max_length=128
):
    tokenizer = transformers.RobertaTokenizer.from_pretrained(model_name)
    inputs = list(example_batch["processed_data"])

    features = tokenizer(
        inputs,
        max_length=max_length,
        truncation=True,
        padding='max_length',
    )
    features["labels"] = example_batch["target"]
    return features


class roberta_large_model(RobertaPreTrainedModel):
    def __init__(self, config, **kwargs):
        super().__init__(transformers.PretrainedConfig())
        self.num_labels = kwargs.get("task_labels_map", {})
        self.config = config

        self.roberta = RobertaModel(config)
        classifier_dropout = (
            config.classifier_dropout
            if config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        ## add task specific output heads
        self.classifier1 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[0]
        )
        self.classifier2 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[1]
        )
        self.classifier3 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[2]
        )
        self.classifier4 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[3]
        )
        self.classifier5 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[4]
        )
        self.classifier6 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[5]
        )
        self.classifier7 = nn.Linear(
            config.hidden_size, list(self.num_labels.values())[6]
        )
        

        self.init_weights()

    @add_start_docstrings_to_model_forward(
        ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")
    )
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        task_name=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        print(outputs)
        print("*"*100)
        print(outputs[1])
        
        print(outputs[1].size())

        pooled_output = self.dropout(pooled_output)
        logits = None
        if task_name == list(self.num_labels.keys())[0]:
            logits = self.classifier1(pooled_output)
        elif task_name == list(self.num_labels.keys())[1]:
            logits = self.classifier2(pooled_output)
        elif task_name == list(self.num_labels.keys())[2]:
            logits = self.classifier3(pooled_output)
        elif task_name == list(self.num_labels.keys())[3]:
            logits = self.classifier4(pooled_output)
        elif task_name == list(self.num_labels.keys())[4]:
            logits = self.classifier5(pooled_output)
        elif task_name == list(self.num_labels.keys())[5]:
            logits = self.classifier6(pooled_output)
        elif task_name == list(self.num_labels.keys())[6]:
            logits = self.classifier7(pooled_output)
        elif task_name == list(self.num_labels.keys())[7]:
            logits = self.classifier8(pooled_output)
        

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels[task_name] == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels[task_name] > 1 and (
                    labels.dtype == torch.long or labels.dtype == torch.int
                ):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels[task_name] == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(
                    logits.view(-1, self.num_labels[task_name]), labels.view(-1)
                )
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
        
        
def roberta_large_model_loading():
    multitask_model = roberta_base_model.from_pretrained(
        'roberta-large',
        task_labels_map={"stayingpower": 3, "texture": 3, 'smell':3, 'price':3,
                         'colour':3, 'shipping':3, 'packing':3},
    )
    return multitask_model

# Data loader

In [None]:


class NLPDataCollator:
    """
    Extending the existing DataCollator to work with NLP dataset batches
    """

    def __call__(
        self, features: List[Union[InputDataClass, Dict]]
    ) -> Dict[str, torch.Tensor]:
        first = features[0]
        if isinstance(first, dict):
            # NLP data sets current works presents features as lists of dictionary
            # (one per example), so we  will adapt the collate_batch logic for that
            if "labels" in first and first["labels"] is not None:
                if first["labels"].dtype == torch.int64:
                    labels = torch.tensor(
                        [f["labels"] for f in features], dtype=torch.long
                    )
                else:
                    labels = torch.tensor(
                        [f["labels"] for f in features], dtype=torch.float
                    )
                batch = {"labels": labels}
            for k, v in first.items():
                if k != "labels" and v is not None and not isinstance(v, str):
                    batch[k] = torch.stack([f[k] for f in features])
            return batch
        else:
            # otherwise, revert to using the default collate_batch
            return DefaultDataCollator().collate_batch(features)


class StrIgnoreDevice(str):
    """
    This is a hack. The Trainer is going call .to(device) on every input
    value, but we need to pass in an additional `task_name` string.
    This prevents it from throwing an error
    """

    def to(self, device):
        return self


class DataLoaderWithTaskname:
    """
    Wrapper around a DataLoader to also yield a task name
    """

    def __init__(self, task_name, data_loader):
        self.task_name = task_name
        self.data_loader = data_loader

        self.batch_size = data_loader.batch_size
        self.dataset = data_loader.dataset

    def __len__(self):
        return len(self.data_loader)

    def __iter__(self):
        for batch in self.data_loader:
            batch["task_name"] = StrIgnoreDevice(self.task_name)
            yield batch


class MultitaskDataloader:
    """
    Data loader that combines and samples from multiple single-task
    data loaders.
    """

    def __init__(self, dataloader_dict):
        self.dataloader_dict = dataloader_dict
        self.num_batches_dict = {
            task_name: len(dataloader)
            for task_name, dataloader in self.dataloader_dict.items()
        }
        self.task_name_list = list(self.dataloader_dict)
        self.dataset = [None] * sum(
            len(dataloader.dataset) for dataloader in self.dataloader_dict.values()
        )

    def __len__(self):
        return sum(self.num_batches_dict.values())

    def __iter__(self):
        """
        For each batch, sample a task, and yield a batch from the respective
        task Dataloader.
        We use size-proportional sampling, but you could easily modify this
        to sample from some-other distribution.
        """
        task_choice_list = []
        for i, task_name in enumerate(self.task_name_list):
            task_choice_list += [i] * self.num_batches_dict[task_name]
        task_choice_list = np.array(task_choice_list)
        np.random.shuffle(task_choice_list)
        dataloader_iter_dict = {
            task_name: iter(dataloader)
            for task_name, dataloader in self.dataloader_dict.items()
        }
        for task_choice in task_choice_list:
            task_name = self.task_name_list[task_choice]
            yield next(dataloader_iter_dict[task_name])


class MultitaskTrainer(transformers.Trainer):
    def get_single_train_dataloader(self, task_name, train_dataset):
        """
        Create a single-task data loader that also yields task names
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")

        train_sampler = (
            RandomSampler(train_dataset)
            if self.args.local_rank == -1
            else DistributedSampler(train_dataset)
        )

        data_loader = DataLoaderWithTaskname(
            task_name=task_name,
            data_loader=DataLoader(
                train_dataset,
                batch_size=self.args.train_batch_size,
                sampler=train_sampler,
                collate_fn=self.data_collator,
            ),
        )
        return data_loader

    def get_train_dataloader(self):
        """
        Returns a MultitaskDataloader, which is not actually a Dataloader
        but an iterable that returns a generator that samples from each
        task Dataloader
        """
        return MultitaskDataloader(
            {
                task_name: self.get_single_train_dataloader(task_name, task_dataset)
                for task_name, task_dataset in self.train_dataset.items()
            }
        )

# Define model

# Trainer

In [None]:
root_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_path = f'{root_path}/dataset'

In [None]:
def load_dataset_from_df(aspect):
  df_label_train = pd.read_csv("{}/stage2/train/{}.csv".format(data_path, aspect))
  df_label_test = pd.read_csv("{}/stage2/test/{}.csv".format(data_path, aspect))
  df_label_val = pd.read_csv("{}/stage2/val/{}.csv".format(data_path, aspect))
  df_label_train['processed_data'] =df_label_train['processed_data'].astype("str")
  df_label_test['processed_data'] =df_label_test['processed_data'].astype("str")
  df_label_val['processed_data'] =df_label_val['processed_data'].astype("str")
  train_data = Dataset.from_pandas(df_label_train[['processed_data', 'target']])
  test_data = Dataset.from_pandas(df_label_test[['processed_data', 'target']])
  val_data = Dataset.from_pandas(df_label_val[['processed_data', 'target']])
  ds = DatasetDict()
  ds['train'] = train_data
  ds['test'] = test_data
  ds['val'] = val_data
  return ds

In [None]:

try:
    nltk.data.find("tokenizers/punkt")
except (LookupError, OSError):
    if is_offline_mode():
        raise LookupError(
            "Offline mode: run this script without TRANSFORMERS_OFFLINE first to download nltk data files"
        )
    with FileLock(".lock") as lock:
        nltk.download("punkt", quiet=True)
list_aspect = ['stayingpower', 'texture', 'smell', 'price', 'colour', 'shipping', 'packing']
dataset_dict = {
    "stayingpower": load_dataset_from_df('stayingpower'),
    "texture": load_dataset_from_df('texture'),
    "smell": load_dataset_from_df('smell'),
    "price": load_dataset_from_df('price'),
    "colour": load_dataset_from_df('colour'),
    "shipping": load_dataset_from_df('shipping'),
    "packing": load_dataset_from_df('packing'),
    }

for task_name, dataset in dataset_dict.items():
    print(task_name)
    print(dataset_dict[task_name]["train"][0])
    print()

In [None]:
model_dict = {
    'phobert_base': [phobert_base_convert_to_features, phobert_base_model_loading()],
    'phobert_large': [phobert_large_convert_to_features, phobert_large_model_loading()],
    'velectra_base': [velectra_base_convert_to_features, velectra_base_model_loading()],
    'bert_base': [bert_base_convert_to_features, bert_base_model_loading()],
    'bert_large': [bert_large_convert_to_features, bert_large_model_loading()],
    'distilbert_base': [distilbert_base_convert_to_features, distilbert_base_model_loading()],
    'distilbert_large': [distilbert_large_convert_to_features, distilbert_large_model_loading()],
    'xlm_base': [xlm_base_convert_to_features, xlm_base_model_loading()],
    'xlm_large': [xlm_large_convert_to_features, xlm_large_model_loading()],
    'vibert_base': [vibert_base_convert_to_features, vibert_base_model_loading()],
    'roberta_base': [roberta_base_convert_to_features, roberta_base_model_loading()],
    'roberta_large': [roberta_large_convert_to_features, roberta_large_model_loading()]
    
}

In [None]:
## Select the model here
model_name = 'phobert_base'

model_names = [model_name] * 7
config_files = model_names
for idx, task_name in enumerate(list_aspect):
    model_file = Path(f"./{task_name}_model/pytorch_model.bin")
    config_file = Path(f"./{task_name}_model/config.json")
    if model_file.is_file():
        model_names[idx] = f"./{task_name}_model"

    if config_file.is_file():
        config_files[idx] = f"./{task_name}_model"


## Training

In [None]:
convert_func_dict = {
    "stayingpower": model_dict[model_name][0],
    "texture": model_dict[model_name][0],
    "smell": model_dict[model_name][0],
    "price": model_dict[model_name][0],
    "colour": model_dict[model_name][0],
    "shipping": model_dict[model_name][0],
    "packing": model_dict[model_name][0],
}

columns_dict = {
    "stayingpower": ["input_ids", "attention_mask", "labels"],
    "texture": ["input_ids", "attention_mask", "labels"],
    "smell": ["input_ids", "attention_mask", "labels"],
    "price": ["input_ids", "attention_mask", "labels"],
    "colour": ["input_ids", "attention_mask", "labels"],
    "shipping": ["input_ids", "attention_mask", "labels"],
    "packing": ["input_ids", "attention_mask", "labels"],
}

features_dict = {}
for task_name, dataset in dataset_dict.items():
    features_dict[task_name] = {}
    for phase, phase_dataset in dataset.items():
        features_dict[task_name][phase] = phase_dataset.map(
            convert_func_dict[task_name],
            batched=True,
            load_from_cache_file=False,
        )
        print(
            task_name,
            phase,
            len(phase_dataset),
            len(features_dict[task_name][phase]),
        )
        features_dict[task_name][phase].set_format(
            type="torch",
            columns=columns_dict[task_name],
        )
        print(
            task_name,
            phase,
            len(phase_dataset),
            len(features_dict[task_name][phase]),
        )


In [None]:
train_dataset = {
    task_name: dataset["train"] for task_name, dataset in features_dict.items()
}

trainer = MultitaskTrainer(
    model=model_dict[model_name][1],
    args=transformers.TrainingArguments(
        output_dir='model/',
        overwrite_output_dir=True,
        learning_rate=1e-5,
        do_train=True,
        num_train_epochs=10,
        # Adjust batch size if this doesn't fit on the Colab GPU
        per_device_train_batch_size=16,
        save_steps=3000,
    ),
    data_collator=NLPDataCollator(),
    train_dataset=train_dataset,
)
trainer.train()

# Evaluation in test set

In [None]:
preds_dict = {}
for task_name in list_aspect:
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["test"])
    )
    print(eval_dataloader)
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer.prediction_loop(
        eval_dataloader, 
        description=f"test: {task_name}",
    )

In [None]:
for index in range(len(list_aspect)):
  print(list_aspect[index])
  df_label_test = pd.read_csv("{}/stage2/test/{}.csv".format(data_path, list_aspect[index]))
  temp = np.argmax(preds_dict[list_aspect[index]].predictions, axis=1)
  print(classification_report(df_label_test['target'], temp))
  report = classification_report(df_label_test['target'], temp, output_dict=True)
  df = pd.DataFrame(report).transpose()
  df.to_csv("{}/result/stage2_sentiment_classification/phobert_base/test/{}.csv".format(root_path, list_aspect[index]), index=True)
  print("*"*100)

# Evaluation in val set

In [None]:
preds_dict = {}
for task_name in list_aspect:
    eval_dataloader = DataLoaderWithTaskname(
        task_name,
        trainer.get_eval_dataloader(eval_dataset=features_dict[task_name]["val"])
    )
    print(eval_dataloader)
    print(eval_dataloader.data_loader.collate_fn)
    preds_dict[task_name] = trainer.prediction_loop(
        eval_dataloader, 
        description=f"val: {task_name}",
    )

In [None]:
for index in range(len(list_aspect)):
  print(list_aspect[index])
  df_label_test = pd.read_csv("{}/stage2/val/{}.csv".format(data_path, list_aspect[index]))
  temp = np.argmax(preds_dict[list_aspect[index]].predictions, axis=1)
  print(classification_report(df_label_test['target'], temp))
  report = classification_report(df_label_test['target'], temp, output_dict=True)
  df = pd.DataFrame(report).transpose()
  df.to_csv("{}/result/stage2_sentiment_classification/phobert_base/val/{}.csv".format(root_path, list_aspect[index]), index=True)
  print("*"*100)