In [1]:
import os
import glob
import json
import itertools
from typing import List 
import instructor
from vertexai.generative_models import GenerativeModel 
from pydantic import BaseModel
import pandas as pd
from tqdm import tqdm

os.environ['GOOGLE_CLOUD_PROJECT'] = "terraform-training-387507"
os.environ['GOOGLE_CLOUD_LOCATION'] = "global"
os.environ['GOOGLE_GENAI_USE_VERTEXAI'] = "true"

output_dir = "berkeley"
os.makedirs(output_dir, exist_ok=True)

In [2]:
data = pd.read_csv("../raw/berkeley_dataset_multi.csv")
data  = data[data['contains_hate'] == 1]
selected = ['age', 'disability']
filter = " or ".join(f"{col} == 1" for col in selected)
data = data.query(filter)
data = data.reset_index(drop=True)
ignored_cols = ['text', 'hatespeech', 'contains_hate']
data['labels'] = data.apply(lambda x: ','.join(k for k in x.keys() if k not in ignored_cols and x[k] == 1), axis=1)

In [3]:
prompt = """
You are an expert in hate speech detection and generation
Below there are instances of hate speech comments. Each of them can have one or more labels assigned to it, seperated by comma. This corresponds to multilabel classification task.

```
{inputs}
```

For each comment, generate at least 5 comments which have the same semantic meaning and the same labels assigned to them. Use rude, foul and explicit language where applicable.
Very long comments should be summarized or paraphrased into shorter ones. The generated comments should vary in tone, severity, or lexical variation. Return them in JSON format
"""

In [4]:
class GenerateExamples(BaseModel):
    comment: str
    index: int
    label: List[str]
    generated_comments: List[str]


client = instructor.from_vertexai(
    client=GenerativeModel("gemini-2.5-flash"),
    mode=instructor.Mode.VERTEXAI_TOOLS,
)

In [14]:
batch_size = 70
start = 900
# end = 630
end = data.shape[0]

with tqdm(total=end - start, initial=start) as pbar:
    for val in range(start, end, batch_size):
        from_ = val
        to_ = min(val + batch_size, end) 
        pbar.set_description(f"Processing {from_} - {to_}")
        inputs = "".join([f"""
    Comment: {x['text'].strip()}
    Index: {i} 
    Labels: {x["labels"]}
    """ for i, x in data[from_:to_].iterrows()])
        try:
            resp = client.create(
                messages=[{"role": "user", "content": prompt.format(inputs=inputs)}],
                response_model=List[GenerateExamples],
            )
            with open(f"{output_dir}/index-{from_}-{to_}.json", "w", encoding='utf-8') as f:
                json.dump([json.loads(res.model_dump_json()) for res in resp], f, ensure_ascii=False)
        except Exception as e:
            print(f"Error while processing range {from_} - {to_}:", e.__str__())
        pbar.update(batch_size)

Processing 970 - 1040: : 970it [00:01, 41.84it/s]

Error while processing range 900 - 970: list index out of range


Processing 1040 - 1110: : 1040it [06:19,  3.19s/it]

Error while processing range 970 - 1040: list index out of range


Processing 1180 - 1250: : 1180it [08:11,  1.49s/it]

Error while processing range 1110 - 1180: list index out of range


Processing 1390 - 1450: : 1460it [25:37,  2.74s/it]


In [15]:
files = glob.glob(f"{output_dir}/*.json")

def extract_instances_df(instance_data):
    labels = instance_data['label']
    gen_instances = list(itertools.product(instance_data['generated_comments'], labels or [None]))
    instance_df = pd.DataFrame(data=gen_instances, columns=['text', 'target'])
    instance_df['value'] = 1 if len(labels) > 0 else 0
    return instance_df

def process_file(filename):
    with open(filename, "r", encoding='utf-8') as f:
        retrieved = json.load(f)
    result_df = pd.concat(list(map(extract_instances_df, retrieved)))
    return result_df

In [16]:
retrieved_df = pd.concat(list(map(process_file, files)))
retrieved_df.to_csv(f"{output_dir}/generated.csv", index=None)