In [46]:
import json
from collections import defaultdict
from rouge_score import rouge_scorer
# input --> test_prompt.json 
# output --> generated_questions.json 

with open("test_prompt.json", "r") as f:
  db_init = json.load(f)
f.close()
with open("generated_questions.json", "r") as f:
  generated_questions = json.load(f)
f.close()

scorer = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=False) # same as self-instruct setting
# for each input compute rouge-l score with each output from generated questions
db_inputs = [x['inputs'] for x in db_init]
q_inputs = [x['inputs'] for x in generated_questions]


scores = defaultdict(list)
excluded_qs = set()
for i, existing_q in enumerate(db_inputs):
  for j, new_q in enumerate(q_inputs):
    score = scorer.score(new_q, existing_q)['rougeL'].fmeasure
    scores[j].append((i, score))
    if score > 0.7:
      excluded_qs.add(j)

# filter poor rouge score qs
added_questions = [q for i, q in enumerate(generated_questions) if i not in excluded_qs]

# append to init_db json

with open("test_prompt.json", "r") as f:
  db_init = json.load(f)
  db_init.extend(added_questions)
f.close()
with open("test_prompt.json", "w") as f:
  print(db_init)
  json.dump(db_init, f)
f.close()


[{'inputs': 'Determine whether the given sequence of parentheses is properly matched.\n\nSequence: } [ ] }\nValid/Invalid?', 'targets': ['Invalid'], 'multiple_choice_targets': ['Valid', 'Invalid'], 'multiple_choice_scores': [0, 1], 'idx': 158}, {'inputs': 'Determine whether the given sequence of parentheses is properly matched.\n\nSequence: [ ) ( ) } { ) } } (\nValid/Invalid?', 'targets': ['Invalid'], 'multiple_choice_targets': ['Valid', 'Invalid'], 'multiple_choice_scores': [0, 1], 'idx': 551}, {'inputs': 'Determine whether the given sequence of parentheses is properly matched.\n\nSequence: { { [ ) } } [ } ( )\nValid/Invalid?', 'targets': ['Invalid'], 'multiple_choice_targets': ['Valid', 'Invalid'], 'multiple_choice_scores': [0, 1], 'idx': 141}, {'inputs': 'Given a list of integers, find the maximum sum of any contiguous subarray.\n\nList: [1, -2, 3, 4, -5, 8]\nMaximum sum:', 'targets': ['10'], 'multiple_choice_targets': ['-2', '3', '10', '12', '13']}, {'inputs': 'Given a string, dete

In [None]:
train_dir = None 
train_dir_gen = None
def _rougel_check(task:str, questions):
  """
  task: task name
  questions: generated question list in dict format (e.g., 
  {inputs:<str>, targets:<str>, multiple_choice_targets:<str>})
  """
  # get current examples in db
  with open(f"{train_dir}/{task}.json", "r") as f:
    db_init = json.load(f)
  f.close()
  with open(f"{train_dir_gen}/{task}.json", "r") as f:
    existing_generated_questions = json.load(f)
  f.close()

  db_init.extend(existing_generated_questions)

  db_inputs = [question['inputs'] for question in db_init]
  generated_inputs = [question['inputs'] for question in questions]

  scorer = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=False) # same as self-instruct setting
  scores, excluded_qs = defaultdict(list), set()
  for i, existing_q in enumerate(db_inputs):
    for j, new_q in enumerate(generated_inputs):
      score = scorer.score(new_q, existing_q)['rougeL'].fmeasure
      scores[j].append((i, score))
      if score > 0.7:
        excluded_qs.add(j)

  return [q for i, q in enumerate(questions) if i not in excluded_qs]

In [48]:
import csv
filter_summary = {"parse_filters" : 1,"field_check_filters" : 0, "rougel_filters" : 2}
task = "test"
with open(f"test.csv", "a+") as f:
  # task, parse filter, rl filter, fc filter <-- columns
  filter_summary["task"] = task
  dict_writer = csv.DictWriter(
      f,
      fieldnames=[
          "task",
          "parse_filters",
          "field_check_filters",
          "rougel_filters",
      ],
  )
  dict_writer.writeheader()
  dict_writer.writerow(filter_summary)
f.close()