Skip to content

Commit

Permalink
Resolve duplication issue with filtered prosocial (#3202)
Browse files Browse the repository at this point in the history
  • Loading branch information
olliestanley committed May 20, 2023
1 parent d39976a commit d726e7f
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions model/model_training/custom_datasets/toxic_conversation.py
Expand Up @@ -20,7 +20,12 @@ class ProsocialDialogueExplaination(Dataset):

def __init__(self, split="train", cache_dir=".cache") -> None:
super().__init__()
dataset = load_dataset("Englishman2022/prosocial-dialog-filtered", cache_dir=cache_dir)[split]
dataset = load_dataset(
"Englishman2022/prosocial-dialog-filtered",
data_files="train.json",
cache_dir=cache_dir,
revision="e121e4fd886fadc030d633274c053b71839f9c20",
)[split]
self.pairs = []
for row in dataset:
for safety_annotation, safe_answer in zip(row["safety_annotations"], row["safety_annotation_reasons"]):
Expand Down Expand Up @@ -54,7 +59,12 @@ class ProsocialDialogue(Dataset):

def __init__(self, split="train", cache_dir=".cache") -> None:
super().__init__()
dataset = load_dataset("Englishman2022/prosocial-dialog-filtered", cache_dir=cache_dir)[split]
dataset = load_dataset(
"Englishman2022/prosocial-dialog-filtered",
data_files="train.json",
cache_dir=cache_dir,
revision="e121e4fd886fadc030d633274c053b71839f9c20",
)[split]
self.pairs = []
for row in dataset:
prompt = row["context"]
Expand Down

0 comments on commit d726e7f

Please sign in to comment.