In [2]:
from transformers import AutoTokenizer
from transformers import DistilBertForTokenClassification
import torch
from labels import id2label

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import gradio as gr

In [4]:
tokenizer = AutoTokenizer.from_pretrained("seelennebel/AM_tokenizer")    
model = DistilBertForTokenClassification.from_pretrained("seelennebel/AM")

In [5]:
def output_AM(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_labels = torch.argmax(probabilities, dim=-1)
    predicted_classes = []
    for value in predicted_labels[0]:
        predicted_classes.append(id2label[int(value)])

    decoded_tokens = []
    decoded_text = ""
    for i, input_id in enumerate(inputs["input_ids"][0]):
        if predicted_labels[0][i] != 0:
            decoded_tokens.append(id2label[int(predicted_labels[0][i])])
        else:
            decoded_tokens.append(tokenizer.decode(input_id))

    truncated_decoded_tokens = []

    for token in decoded_tokens:
        if token == "[CLS]" or token == "[SEP]":
            continue
        if token[:2] == "I-" and token in id2label.values():
            continue 
        else:
            truncated_decoded_tokens.append(token)

    decoded_text = tokenizer.convert_tokens_to_string(truncated_decoded_tokens)

    return decoded_text


In [None]:
interface = gr.Interface(
    fn=output_AM,
    inputs=gr.Textbox(lines=5, placeholder="Enter text to be masked"),
    outputs="text",
    title="AM",
    description="Enter text that contains personally identifiable information (we will not steal your data). The model will output the masked text."
)

In [None]:
interface.launch()