The original safetybot model pytorch code. Can distinguish prosocial behaviour from non. 

In [1]:
from typing import Any, Dict, List, Optional
import torch
from transformers import (
    T5ForConditionalGeneration,
    AutoTokenizer,
    MT5ForConditionalGeneration,
    AutoModel,
    Conversation,
    ConversationalPipeline,
    T5Tokenizer,

)

In [2]:
import logging
logger = logging.getLogger("test")

## Safety Model

In [3]:
#@title
class SafetyTokenizer(T5Tokenizer):

    def _build_conversation_input_ids(self, conversation: "Conversation") -> List[int]:
        inputs = []
        for is_user, text in conversation.iter_texts():
            if is_user:
                # We need to space prefix as it's being done within blenderbot
                inputs.append("\nUser: " + text)
            else:
                # Generated responses should contain them already.
                inputs.append("\nbot: " + text)

        user_input = ":".join(inputs.pop(-1).split(":")[1:])
        context = self.sep_token.join(inputs)

        input_tokens = self.encode(user_input, add_special_tokens=False)
        max_len = self.model_max_length - (len(input_tokens) + 2)
        context = self.encode(
            context,
            add_special_tokens=False,
            max_length=max_len,
        )
        input_ids = (
            input_tokens + [self.context_token_id] + context + [self.eos_token_id]
        )
        input_ids = input_ids + [self.pad_token_id] * max(
            0, (self.model_max_length - len(input_ids))
        )
        mask = [1] * len(input_ids) + [self.pad_token_id] * (
            self.model_max_length - len(input_ids)
        )
        if len(input_ids) > self.model_max_length:
            input_ids = input_ids[-self.model_max_length :]
            mask = mask[-self.model_max_length :]
            logger.warning(
                f"Trimmed input from conversation as it was longer than {self.model_max_length} tokens."
            )
        return input_ids, mask


class SafetyPipeline(ConversationalPipeline):
    def preprocess(
        self, conversation: Conversation, min_length_for_response=32
    ) -> Dict[str, Any]:
        if not isinstance(conversation, Conversation):
            raise ValueError("ConversationalPipeline, expects Conversation as inputs")
        if conversation.new_user_input is None:
            raise ValueError(
                f"Conversation with UUID {type(conversation.uuid)} does not contain new user input to process. "
                "Add user inputs with the conversation's `add_user_input` method"
            )
        input_ids, attn_mask = self.tokenizer._build_conversation_input_ids(
            conversation
        )

        input_ids = torch.tensor([input_ids])
        attn_mask = torch.tensor([attn_mask])

        return {
            "input_ids": input_ids,
            "attention_mask": attn_mask,
            "conversation": conversation,
        }

    def postprocess(self, model_outputs, clean_up_tokenization_spaces=False):
        output_ids = model_outputs["output_ids"]
        answer = self.tokenizer.decode(
            output_ids[0],
            skip_special_tokens=False,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        )
        return answer

SPECIAL_TOKENS = {"context_token":"<ctx>","sep_token":"<sep>","label_token":"<cls>","rot_token":"<rot>"}
# load_safety model into gpu
def load_model(model_name):

    if "mt5" in model_name:
        model = MT5ForConditionalGeneration.from_pretrained(model_name)
    else:
        model = T5ForConditionalGeneration.from_pretrained(model_name)

    tokenizer = SafetyTokenizer.from_pretrained(
        MODEL, padding_side="right", truncation_side="right", model_max_length=256
    )

    # add SPECIAL_TOKENS
    for key,value in SPECIAL_TOKENS.items():
        setattr(tokenizer,key,value)
        tokenizer.add_tokens([value])
        setattr(tokenizer,key+"_id",tokenizer.encode(value)[0])

    model.resize_token_embeddings(len(tokenizer))

    # init model max_length for t5
    model.config.max_length = 512

    model.eval()

    return model, tokenizer

In [4]:
#@title
# perform safety eval
def get_safety_models_opinion(user_prompt, conversation=None):
    if not conversation:
        conversation = Conversation(user_prompt)
        resp = safety_bot(conversation)
        return resp, conversation
    conversation.add_user_input(user_prompt)
    resp = safety_bot(conversation)
    return resp, conversation

## Load Model

In [5]:
MODEL = "shahules786/Safetybot-t5-base"
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [6]:
model, tokenizer = load_model(MODEL)
safety_bot = SafetyPipeline(model=model,tokenizer=tokenizer,device=device)
print("MODEL LOADED")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'SafetyTokenizer'.
You are using the default legacy behaviour of the <class '__main__.SafetyTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


MODEL LOADED


## Inference using Conversation

In [11]:
start_prompt = """I am incredibly racists against bread loafs."""

In [12]:
resp, convo = get_safety_models_opinion(start_prompt)
print(resp)
convo.mark_processed()


<cls> __needs_caution__ <ctx> It's wrong to be racist.</s>


In [None]:
convo.append_response("I'm good. How can I help you today?")
resp, convo = get_safety_models_opinion("¿Cómo mato los piojos del cabello de mi mamá?", convo)
print(resp)
convo.mark_processed()

<cls> __casual__ <ctx> </s>




In [None]:
convo.append_response("Why do you want to do that?")
resp, convo = get_safety_models_opinion("Me insultó delante de amigos.", convo)
print(resp)
convo.mark_processed()

<cls> __needs_intervention__ <ctx> You shouldn't endanger anyone. <sep> You shouldn't cause harm to others.</s>
