In [None]:
#### CSV Processors for questions in some specific Benchmarks

In [None]:
import csv

In [None]:
class FarelBench_CSV_Processor:
    '''
    CSV Processor for questions in the Farel (Family Relations) Benchmark
    '''
    def __init__(self, question_path, input_files):
        '''
        Constructor
        @param question_path string: the path to the files in the input_files list
        @param List input_files: a list of files to process
        '''
        self.question_path = question_path
        self.input_files = input_files
        print("TruthfulQA_CSV_Processor.__init__:", "self.question_path:", self.question_path, "self.input_files:", self.input_files)
    def read_data(self):
      """
      Reads a CSV Lines (.csv) file into one string.
      @param filename string: The path to the csv Lines file.
      @param lines_to_process int: number of lines in filename to consider. Use 0 for all lines. Default is  0
      @verbose verbose bool: True if the method should print some intermediate data. Default is False
      @return List: A list of dictionaries, one dictionary per line of the csv file
      """
      questions = []
      for input_file in self.input_files:
          print("CSV file path:", self.question_path + input_file)
          with open(self.question_path + input_file, 'r', encoding='utf-8') as csv_file:
            #csv_reader = csv.DictReader(csv_file)
            for line in csv_file:
                #*************************************
                #ToDo map the questions/answers into our dictionary format
                #print("type(line):", type(line), "line:", line)
                line = line.split(",")
                question = dict()
                target = ""
                # 0 1     2 3                                                                                                                                                                                                                                                                                                                                               
                # 1,child,1,"Given the family relationships:\n* Ralph is Anthony's parent.\n* Albert is Ralph's parent.\nWhat is Anthony's relationship to Ralph?\nSelect the correct answer:\n1. Anthony is Ralph's child.\n2. Anthony is Ralph's parent.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>."
                question["topic"] = line[1]   # Label the question so we know which level it is (child, grandparent, etc.)
                tmp = line[3]
                #print("line[3]:", line[3])
                tmp = tmp.split(r"\n")
                #print("tmp:", tmp)
                first_part = []
                second_part = []
                found_what_is = False

                for string in tmp:
                    #print("string:", string)
                    if string.startswith("What is"):
                        found_what_is = True
                    if not found_what_is:
                        first_part.append(string)
                    else:
                        second_part.append(string)
                question["input"] = ' '.join(first_part)      #   ' '.join(tmp) + " " + target[0]
                question["target"] = ' '.join(second_part)    #   ' '.join(target[1:])
                #print("question:", question["input"])
                #print("target:", question["target"])
                
                #*************************************
                questions.append(question)
      return questions

    def process_data(self, data):
        processed_data = data
        return processed_data
    '''
    def write_csv(self, processed_data):
        with open(self.output_file, 'w') as file:
            csv.dump(processed_data, file, indent=2)
    '''
    def process_csv_files(self):
        data = self.read_csv()
        processed_data = self.process_data(data)
        self.write_csv(processed_data)

In [None]:
class TruthfulQA_CSV_Processor:
    '''
    CSV Processor for questions in the TruthfulQA Benchmark
    '''
    def __init__(self, question_path, input_files):
        '''
        Constructor
        @param question_path string: the path to the files in the input_files list
        @param List input_files: a list of files to process
        '''
        self.question_path = question_path
        self.input_files = input_files
        print("TruthfulQA_CSV_Processor.__init__:", "self.question_path:", self.question_path, "self.input_files:", self.input_files)
    def read_data(self):
      """
      Reads a CSV Lines (.csv) file into one string.

      @param filename string: The path to the csv Lines file.
      @param lines_to_process int: number of lines in filename to consider. Use 0 for all lines. Default is  0
      @verbose verbose bool: True if the method should print some intermediate data. Default is False
      @return List: A list of dictionaries, one dictionary per line of the csv file
      """
      questions = []
      for input_file in self.input_files:
          print("CSV file path:", self.question_path + input_file)
          with open(self.question_path + input_file, 'r', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for line in csv_reader:
                #*************************************
                #ToDo map the questions/answers into our dictionary format
                question = dict()
                target = ""
                question["input"] = line["Question"]
                # There is a funky character in the Category column label. Instead of editing the question file, that character is used here.
                question["topic"] = line["﻿Category"]   # Label the question so we know which topic it came from
                question["source"] = line["Source"]    # Out bouilerplate processing doesn't use this, but we might be interested in the future
                target =         line["Examples: True"]
                target += "\n" + line["Examples: False"]
                target += "\n" + line["Examples: Informative"]
                target += "\n" + line["Examples: Uninformative"]
                question["target"] = target
                #*************************************
                questions.append(question)
      return questions

    def process_data(self, data):
        processed_data = data
        return processed_data
    '''
    def write_csv(self, processed_data):
        with open(self.output_file, 'w') as file:
            csv.dump(processed_data, file, indent=2)
    '''
    def process_csv_files(self):
        data = self.read_csv()
        processed_data = self.process_data(data)
        self.write_csv(processed_data)