In [1]:
import logging
import os
import argparse

import numpy as np
from torch.utils.data.dataset import Dataset
from transformers.data.processors.utils import InputExample, InputFeatures
from transformers.data.processors.glue import glue_convert_examples_to_features
from transformers.data.processors.utils import DataProcessor
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
)
from sfda.models import sfdaRobertaNegation
from sfda.train_utils import sfdaTrainer

In [2]:
labels = ["-1", "1"]
max_length = 128
logger = logging.getLogger(__name__)


In [3]:
class NegationDataset(Dataset):
    def __init__(self, features):
        self.features = features
        self.label_list = ["-1", "1"]

    def __len__(self):
        return len(self.features)

    def __getitem__(self, i) -> InputFeatures:
        return self.features[i]

    def get_labels(self):
        return self.label_list

    @classmethod
    def from_tsv(cls, tsv_file, tokenizer):
        """Creates examples for the test set."""
        lines = DataProcessor._read_tsv(tsv_file)
        examples = []
        for (i, line) in enumerate(lines):
            guid = 'instance-%d' % i
            if line[0] in labels:
                text_a = '\t'.join(line[1:])
            else:
                text_a = '\t'.join(line)

            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=None))

        features = glue_convert_examples_to_features(
            examples,
            tokenizer,
            max_length=max_length,
            label_list=labels,
            output_mode='classification',
        )
        return cls(features)

In [46]:
from transformers.trainer_utils import nested_concat,nested_numpify,Any
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import torch
from transformers.file_utils import is_torch_tpu_available
from tqdm import tqdm as tqdm

from typing import NamedTuple, Union,Tuple,Optional,Dict
import numpy as np
import logging
from transformers import Trainer


logger = logging.getLogger(__name__)
class sfdaPredictionOutput(NamedTuple):
    predictions: Union[np.ndarray, Tuple[np.ndarray]]
    label_ids: Optional[np.ndarray]
    metrics: Optional[Dict[str, float]]
    feat_matrix: Optional[np.ndarray]

class sfdaTrainer(Trainer):
        def __init__(
        self,
        **kwargs,
    ):
            super(sfdaTrainer,self).__init__(**kwargs)
        def predict(self, test_dataset: Dataset, ret_feats: Optional[bool] = None) -> sfdaPredictionOutput:
            """
            Run prediction and returns predictions and potential metrics.

            Depending on the dataset and your use case, your test dataset may contain labels.
            In that case, this method will also return metrics, like in :obj:`evaluate()`.

            Args:
                test_dataset (:obj:`Dataset`):
                    Dataset to run the predictions on. If it is an :obj:`datasets.Dataset`, columns not accepted by the
                    ``model.forward()`` method are automatically removed.

            Returns:
                `NamedTuple`:
                predictions (:obj:`np.ndarray`):
                    The predictions on :obj:`test_dataset`.
                label_ids (:obj:`np.ndarray`, `optional`):
                    The labels (if the dataset contained some).
                metrics (:obj:`Dict[str, float]`, `optional`):
                    The potential dictionary of metrics (if the dataset contained labels).
            """
            test_dataloader = self.get_test_dataloader(test_dataset)

            return self.prediction_loop(test_dataloader, description="Prediction",ret_feats = ret_feats)


        def prediction_loop(
        self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None , ret_feats: Optional[bool] = None,
        ) -> sfdaPredictionOutput:
            """
            Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.

            Works both with or without labels.
            """
            if hasattr(self, "_prediction_loop"):
                warnings.warn(
                    "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
                    FutureWarning,
                )
                return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)

            prediction_loss_only = (
                prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
            )

            assert not getattr(
                self.model.config, "output_attentions", False
            ), "The prediction loop does not work with `output_attentions=True`."
            assert not getattr(
                self.model.config, "output_hidden_states", False
            ), "The prediction loop does not work with `output_hidden_states=True`."

            model = self.model
            # multi-gpu eval
            if self.args.n_gpu > 1:
                model = torch.nn.DataParallel(model)
            else:
                model = self.model
            # Note: in torch.distributed mode, there's no point in wrapping the model
            # inside a DistributedDataParallel as we'll be under `no_grad` anyways.

            batch_size = dataloader.batch_size
            logger.info("***** Running %s *****", description)
            logger.info("  Num examples = %d", self.num_examples(dataloader))
            logger.info("  Batch size = %d", batch_size)
            eval_losses: List[float] = []
            preds: torch.Tensor = None
            label_ids: torch.Tensor = None
            feat_mat: torch.Tensor = None
            model.eval()

            if is_torch_tpu_available():
                dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)

            if self.args.past_index >= 0:
                self._past = None

            disable_tqdm = not self.is_local_process_zero() or self.args.disable_tqdm
            for inputs in tqdm(dataloader, desc=description, disable=disable_tqdm):
                loss, logits, labels,feats = self.prediction_step(model, inputs, prediction_loss_only,ret_feats = ret_feats)
                batch_size = inputs[list(inputs.keys())[0]].shape[0]
                if loss is not None:
                    eval_losses.extend([loss] * batch_size)
                if logits is not None:
                    preds = logits if preds is None else nested_concat(preds, logits, dim=0)
                if labels is not None:
                    label_ids = labels if label_ids is None else nested_concat(label_ids, labels, dim=0)
                if feats is not None:
                    feat_mat = feats if feat_mat is None else nested_concat(feat_mat,feats)

            if self.args.past_index and hasattr(self, "_past"):
                # Clean the state at the end of the evaluation loop
                delattr(self, "_past")

            if self.args.local_rank != -1:
                # In distributed mode, concatenate all results from all nodes:
                if preds is not None:
                    preds = distributed_concat(preds, num_total_examples=self.num_examples(dataloader))
                if label_ids is not None:
                    label_ids = distributed_concat(label_ids, num_total_examples=self.num_examples(dataloader))
                if feat_mat is not None:
                    feat_mat = distributed_concat(feat_mat, num_total_examples=self.num_examples(dataloader))
            
            elif is_torch_tpu_available():
                # tpu-comment: Get all predictions and labels from all worker shards of eval dataset
                if preds is not None:
                    preds = nested_xla_mesh_reduce(preds, "eval_preds")
                if label_ids is not None:
                    label_ids = nested_xla_mesh_reduce(label_ids, "eval_label_ids")
                if feat_mat is not None:
                    feat_mat = nested_xla_mesh_reduce(feat_mat, "eval_feat_mat")
                if eval_losses is not None:
                    eval_losses = xm.mesh_reduce("eval_losses", torch.tensor(eval_losses), torch.cat).tolist()

            # Finally, turn the aggregated tensors into numpy arrays.
            if preds is not None:
                preds = nested_numpify(preds)
            if label_ids is not None:
                label_ids = nested_numpify(label_ids)
            if feat_mat is not None:
                feat_mat = nested_numpify(feat_mat)

            if self.compute_metrics is not None and preds is not None and label_ids is not None:
                metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
            else:
                metrics = {}
            if len(eval_losses) > 0:
                if self.args.local_rank != -1:
                    metrics["eval_loss"] = (
                        distributed_broadcast_scalars(eval_losses, num_total_examples=self.num_examples(dataloader))
                        .mean()
                        .item()
                    )
                else:
                    metrics["eval_loss"] = np.mean(eval_losses)

            # Prefix all keys with eval_
            for key in list(metrics.keys()):
                if not key.startswith("eval_"):
                    metrics[f"eval_{key}"] = metrics.pop(key)

            return sfdaPredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics,feat_matrix = feat_mat)


        def prediction_step(
        self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ret_feats: bool,
    ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor],Optional[torch.Tensor]]:
            """
            Perform an evaluation step on :obj:`model` using obj:`inputs`.

            Subclass and override to inject custom behavior.

            Args:
                model (:obj:`nn.Module`):
                    The model to evaluate.
                inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
                    The inputs and targets of the model.

                    The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
                    argument :obj:`labels`. Check your model's documentation for all accepted arguments.
                prediction_loss_only (:obj:`bool`):
                    Whether or not to return the loss only.

            Return:
                Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
                A tuple with the loss, logits and labels (each being optional).
            """
            has_labels = all(inputs.get(k) is not None for k in self.args.label_names)
            inputs = self._prepare_inputs(inputs)

            with torch.no_grad():
                if ret_feats is True:
                    outputs = model(**inputs,return_dict = True)
#                     print(outputs)
                    loss = outputs.loss
                    logits = outputs.logits
                    feats = outputs.last_hidden_state[:,0,:].detach()
                    labels = None
                    if has_labels:
                        # The .mean() is to reduce in case of distributed training
                        loss = loss.mean().item()
                        labels = tuple(inputs.get(name).detach() for name in self.args.label_names)
                        if len(labels) == 1:
                            labels = labels[0]
                            
                    return (loss, logits, labels,feats)
                else:
                    feats = None
                    outputs = model(**inputs)
                    if has_labels:
                        # The .mean() is to reduce in case of distributed training
                        loss = outputs[0].mean().item()
                        logits = outputs[1:]
                    else:
                        loss = None
                        # Slicing so we get a tuple even if `outputs` is a `ModelOutput`.
                        logits = outputs[:]
                    if self.args.past_index >= 0:
                        self._past = outputs[self.args.past_index if has_labels else self.args.past_index - 1]

            if prediction_loss_only:
                return (loss, None, None,feats)

            logits = tuple(logit.detach() for logit in logits)
            if len(logits) == 1:
                logits = logits[0]

            if has_labels:
                labels = tuple(inputs.get(name).detach() for name in self.args.label_names)
                if len(labels) == 1:
                    labels = labels[0]
            else:
                labels = None
#             print(logits.shape)
            return (loss, logits, labels,feats)

In [80]:
data_file, output_dir = "../../practice_text/negation/dev.tsv", "../../Output/negation/" 

In [36]:
model_name = "tmills/roberta_sfda_sharpseed"
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, config=config)
 

RobertaConfig {
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "finetuning_task": "negation",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "type_vocab_size": 1,
  "vocab_size": 50267
}

In [7]:
model = sfdaRobertaNegation.from_pretrained(model_name,
                                                           config=config)

Some weights of the model checkpoint at tmills/roberta_sfda_sharpseed were not used when initializing sfdaRobertaNegation: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing sfdaRobertaNegation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing sfdaRobertaNegation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [81]:
# create a torch dataset from a tsv file
test_dataset = NegationDataset.from_tsv(data_file, tokenizer)

trainer = sfdaTrainer(
    model=model,
    args=TrainingArguments('save_run/'),
    compute_metrics=None,
)

You are instantiating a Trainer but Tensorboard is not installed. You should consider installing it.


In [82]:
prediction_dict = trainer.predict(test_dataset=test_dataset,ret_feats = True)

Prediction: 100%|██████████| 694/694 [01:39<00:00,  6.96it/s]


In [83]:
scores = prediction_dict.predictions
predictions = np.argmax(scores, axis=1)

In [84]:
prediction_dict.feat_matrix

array([[-0.11388139,  0.4638905 ,  0.91159725, ...,  0.7817307 ,
        -0.42704606,  0.6504216 ],
       [-0.11873791,  0.465232  ,  0.90721726, ...,  0.7930985 ,
        -0.4324146 ,  0.6495148 ],
       [-0.0127169 , -0.15409257, -1.6846372 , ..., -0.622069  ,
         0.56058973, -0.92781883],
       ...,
       [-0.07401083, -0.14768088, -1.543641  , ..., -0.6272898 ,
         0.5359691 , -0.91750956],
       [-0.0574165 , -0.1520247 , -1.5837629 , ..., -0.62917686,
         0.544884  , -0.9164321 ],
       [-0.0663449 , -0.15132469, -1.568229  , ..., -0.62588316,
         0.54011476, -0.91360766]], dtype=float32)

In [67]:
predictions[1]

0

In [85]:
feat_matrix = prediction_dict.feat_matrix

In [63]:
feat_matrix.shape

(2886, 768)

In [86]:
os.makedirs(output_dir, exist_ok=True)
output_test_file = os.path.join(output_dir, 'dev_pred.tsv')
feat_matrix_file = os.path.join(output_dir, 'dev_scores_and_feat_mat.npy')
with open(output_test_file, "w") as writer:
    logger.info("***** Test results *****")
    for index, item in enumerate(predictions):
        item = test_dataset.get_labels()[item]
#         print("%s\n" % item)
        writer.write("%s\n" % item)
with open(feat_matrix_file,'wb') as file:
    np.save(file,feat_matrix)
    np.save(file,scores)

In [11]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

Collecting ipywidgets
  Using cached ipywidgets-7.5.1-py2.py3-none-any.whl (121 kB)
Collecting widgetsnbextension~=3.5.0
  Using cached widgetsnbextension-3.5.1-py2.py3-none-any.whl (2.2 MB)
Installing collected packages: widgetsnbextension, ipywidgets
Successfully installed ipywidgets-7.5.1 widgetsnbextension-3.5.1
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m
