In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
import json

def get_data():
    with open(f"multihatespeech/german-oai-mhd.json") as f:
        oai = json.load(f)
    with open(f"multihatespeech/german-celadon-mhd.json") as f:
        celadon = json.load(f)
    with open(f"multihatespeech/german-detox-mhd.json") as f:
        detox = json.load(f)
    return {"oai": oai, "celadon": celadon, "detox": detox}

moderation_data = get_data()

In [3]:
with open(f"streamlit-data.json") as f:
    labeled = json.load(f)["german"]

In [4]:
import pandas as pd

data = pd.read_csv(
    "./MultiLanguageTrainDataset.csv", usecols=["text", "label", "language"]
)
data = data[data.language == 5]
data = data.drop(columns=["language"])
data

Unnamed: 0,text,label
75239,Apropos US-Kreigsverbrechen: Wo bleiben eigen...,0.0
75240,Logo - die Airportslots von Ex-Berlin kassiert...,0.0
75241,Das hat der Günstling von seiner Kanzlerin gel...,1.0
75242,"Die `jungen Leute`, die die F1 mit mehr Stadtr...",0.0
75243,Vater lässt sein Kleinkind alleine am Ufer zur...,1.0
...,...,...
102549,Der Typ sonnte sich doch nur noch in den Erfol...,1.0
102550,Bei verkehrsrechtlicher Nötigung anderer Verke...,0.0
102551,Vieleicht sollte der KFC mal langsam über eine...,0.0
102552,Dann aber auch automatische Waffen für Schüler...,1.0


In [5]:
from transformers import AutoTokenizer

MODEL = "distilbert/distilbert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(MODEL)

labels = [
    "outside",
    # Content that incites or glorifies physical harm or aggression, including threats.
    "violent",  # Example: "I'm going to hurt you, and you deserve it."
    # Content that is vulgar, explicit, or offensive in language or sexual nature.
    "obscene",  # Example: "What the **** is wrong with you, you piece of ****?"
    # Content that includes persistent unwanted behavior or personal attacks.
    "harassment",  # Example: "You're a failure, and everyone knows it."
    # Content that demeans, attacks, or excludes based on personal or group attributes.
    "hate_discrimination",  # Example: "People like you shouldn't exist."
    # Content that promotes self-harm, suicide, or glorifies injury.
    "self_harm",  # Example: "Cutting yourself is the only way to feel better."
    # Content that is contextually inappropriate or violates the norms of a specific audience.
    "inappropriate",  # Example: Sharing adult-themed jokes in a children's forum.
]

label_to_id = {k: i for i, k in enumerate(labels)}
id_to_label = {i: k for i, k in enumerate(labels)}
    

def is_not_flagged(data, i, source):
    if source.iloc[i]["label"] == 1.0:
        return False

    for key, v in data["oai"][i].items():
        if type(v) is not float:
            if key == "flagged" and v:
                return False
            continue
        if v > 0.4:
            return False

    for key, v in data["celadon"][i].items():
        if type(v) is not float:
            if key == "Flagged" and v != "No":
                return False
            continue
        if v > 0.7:
            return False

    for key, v in data["detox"][i].items():
        if type(v) is not float:
            continue
        if v > 0.25:
            return False
    return True

def add_to_labels(v, labels):
    ranges = []
    # collapse ranges!
    for elem in labels:
        _, start, end, label = elem
        label = label_to_id[label]
        last_found = len(ranges)
        for i, r in enumerate(ranges):
            if end < r[0]:
                last_found = i
                break
            if start > r[1] or r[2] != label:
                continue
            ranges[i] = [min(start, r[0]), max(end, r[1]), r[2]]
            last_found = -1
            break
        if last_found != -1:
            ranges.insert(last_found, [start, end, label])
    # apply to current tokens
    for i, k in enumerate(v['offset_mapping']):
        if k[0] == 0 and k[1] == 0:
            continue
        for r in ranges:
            if k[1] < r[0]:
                break
            if k[0] > r[1]:
                continue
            v['labels'][i] = r[2]
    del v['offset_mapping']

def make_complete_dataset():
    new_data = []
    new_test_data = []
    count_p = 0
    count_n = 0
    for i in range(len(data)):
        idx = data.index[i]
        text = data.iloc[i]["text"]
        if str(idx) in labeled:
            v = tokenizer(text, return_offsets_mapping=True)
            v['labels'] = [0] * len(v['input_ids'])
            add_to_labels(v, labeled[str(idx)])
            if count_p < 1:
                count_p += 1
                new_test_data.append((i, v))
            else:
                new_data.append((i, v))
        elif is_not_flagged(moderation_data, i, data):
            v = tokenizer(text)
            v['labels'] = [0] * len(v['input_ids'])
            if count_n < 15:
                count_n += 1
                new_test_data.append((i, v))
            else:
                new_data.append((i, v))
    return new_test_data, new_data

new_test_data, new_data = make_complete_dataset()

In [6]:
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __getitem__(self, index):
        x = self.data[index][1]
        return x
    
    def __len__(self):
        return len(self.data)
test = MyDataset(new_test_data)
train = MyDataset(new_data)

In [7]:
from transformers import DataCollatorForTokenClassification
import numpy as np
import evaluate

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

seqeval = evaluate.load("seqeval")
label_list = labels

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [8]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer


model = AutoModelForTokenClassification.from_pretrained(
    MODEL, num_labels=len(labels), id2label=id_to_label, label2id=label_to_id
)

training_args = TrainingArguments(
    output_dir="distilbert-token-swearword",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0281,0.075451,0.9375,0.75,0.833333,0.992521
2,0.0046,0.072872,0.9375,0.75,0.833333,0.992521


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=1062, training_loss=0.015994164791035336, metrics={'train_runtime': 533.7606, 'train_samples_per_second': 31.834, 'train_steps_per_second': 1.99, 'total_flos': 501663429461568.0, 'train_loss': 0.015994164791035336, 'epoch': 2.0})

In [30]:
def test_with_str(text):
    inputs = tokenizer(text, return_tensors="pt")
    for key in inputs.keys():
        inputs[key] = inputs[key].to('cuda:0')
    with torch.no_grad():
        logits = model(**inputs).logits
    predictions = torch.argmax(logits, dim=2)
    return [model.config.id2label[t.item()] for t in predictions[0]]
