In [2]:
# %pip install torch==2.0.1 torchdata

# %pip install --disable-pip-version-check -q \
#     transformers==4.34.1 \
#     datasets==2.12.0 

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
!nvidia-smi

Sun May 12 14:10:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  | 00000000:48:00.0 Off |                    0 |
| N/A   24C    P0              65W / 400W |   7839MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM4-80GB          On  | 00000000:8B:00.0 Off |  

In [23]:
from dataclasses import dataclass, field
from typing import Optional

import torch
from datasets import load_dataset
from torch.optim import Adam
from tqdm import tqdm
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    HfArgumentParser,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

In [7]:
from datasets import load_dataset, Dataset, DatasetDict
from typing import Callable
import pandas as pd

In [4]:
def helper_remove_columns(ds: Dataset, columns_to_drop: list):
    return ds.remove_columns(columns_to_drop)


def build_dataset(
    dataset_name: str, filter_fn: Callable, helper_fn: Callable | None, *args
) -> Dataset:
    ds = load_dataset(dataset_name, split="train")
    ds = ds.filter(filter_fn, batched=False)

    if isinstance(helper_fn, Callable):
        ds = helper_fn(ds, *args)

    return ds


def filter_real_toxicity_prompts(sample):
    def check_param(name: float, threshold: float):
        return name is not None and name > threshold

    toxicity = sample["prompt"]["toxicity"]
    threat = sample["prompt"]["threat"]
    insult = sample["prompt"]["insult"]
    severe_toxicity = sample["prompt"]["severe_toxicity"]
    profanity = sample["prompt"]["profanity"]
    sexually_explicit = sample["prompt"]["sexually_explicit"]
    identity_attack = sample["prompt"]["identity_attack"]
    flirtation = sample["prompt"]["flirtation"]

    return (
        check_param(toxicity, 0.6)
        and check_param(threat, 0.3)
        and check_param(insult, 0.3)
        and check_param(severe_toxicity, 0.3)
        and check_param(profanity, 0.3)
        and check_param(sexually_explicit, 0.3)
        and check_param(identity_attack, 0.3)
        and check_param(flirtation, 0.3)
    )


def build_dataset(df: pd.DataFrame) -> DatasetDict:
    train_ratio = 0.9
    val_ratio = 0.1
    train_data = df.sample(frac=train_ratio, random_state=42)
    val_data = df.drop(train_data.index).sample(frac=val_ratio / (val_ratio), random_state=42)

    def create_dataset(df: pd.DataFrame):
        return Dataset.from_dict(
                    {
                        "corpora": [
                            {'Not_toxic': nt, 'Toxic': t}
                            for t, nt, in zip(df["toxic"], df["not_toxic"])
                        ]
                    }
                )

    dsd = DatasetDict(
        train=create_dataset(train_data),
        validation=create_dataset(val_data)
    )

    return dsd


columns_to_drop = ["filename", "begin", "end", "challenging"]
real_toxicity_prompts = build_dataset(
    "allenai/real-toxicity-prompts",
    filter_real_toxicity_prompts,
    helper_remove_columns,
    columns_to_drop,
)  # 2012 items



parallel_corpora = build_dataset(pd.read_csv("generated.csv"))

In [61]:
from datasets import load_dataset
from transformers import pipeline
import json

dataset = load_dataset('skg/toxigen-data', name="train")


Downloading data: 100%|██████████| 18.8M/18.8M [00:01<00:00, 11.0MB/s]
Generating train split: 100%|██████████| 250951/250951 [00:00<00:00, 337689.97 examples/s]


In [63]:
toxic_prompts = dataset['train'].filter(lambda example: example['prompt_label'] == 1)

Filter: 100%|██████████| 250951/250951 [00:02<00:00, 110617.88 examples/s]


In [91]:
access_token = ""
detoxifier = pipeline("conversational", model="meta-llama/Meta-Llama-3-70B-Instruct", token=access_token)

Loading checkpoint shards: 100%|██████████| 4/4 [01:08<00:00, 17.00s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
messages = [
    {"role": "system", "content": "You are genius in detoxification prompts task."}
]
tp = toxic_prompts['prompt'][:3000]
dict_output = []
for prompt in tp:
    batch = tokenizer(prompt, return_tensors="pt")
    generated_ids = model.generate(batch["input_ids"])
    output = {'toxic': prompt, 'not_toxic': tokenizer.batch_decode(generated_ids, skip_special_tokens=True)}
    dict_output.append(output)

In [103]:
with open('detoxification_results.json', 'w') as f:
    json.dump(dict_output, f)

In [105]:
new_ds = Dataset.from_dict(
                    {
                        "corpora": dict_output
                    }
                )

In [None]:
json_output = []
for idx in range(1, len(messages)):
    conversation = detoxifier(messages[:idx+1])
    output = {'toxic': conversation[-2]['content'], 'not_toxic': conversation[-1]['content']}
    json_output.append(output)

with open('detoxification_results.json', 'w') as f:
    json.dump(json_output, f)