In [None]:
# NOTE PLEASE USE FORKED https://github.com/sinemmy/greatest-good-benchmark (questions have unique identifiers)
import os 
import json

# CURRENTLY THE QUESTION DIR AND WISODM DIR IS EXPECTED TO BE SEPARATE FOLDERS IN YOUR GITHUB FOLDER 
QUESTION_DIR = '../greatest-good-benchmark/' # CHANGE HERE IF NEEDED


# making DATA_DIR separate becuase they also have prompts (originaly but also including inverting the likert scale)
QUESTION_DATA_DIR = os.path.abspath(QUESTION_DIR + 'data/') 
QUESTION_JSON = os.path.abspath('./GreatestGoodBenchmark.json')
INVERTED_JSON = os.path.abspath('./GreatestGoodBenchmarkInverted.json')

In [None]:
## Add unique identifiers for each question (Only need to do this once and it should stop you from doing it again)

with open(QUESTION_JSON, 'r') as f:
    data = json.load(f)

# Check if IDs already exist
if not all('statement_id' in item for item in data):
    # Add IDs to each item
    for i, item in enumerate(data, 1):
        item['statement_id'] = str(i)
    
    # Save updated JSON
    with open('benchmark_questions_with_ids.json', 'w') as f:
        json.dump(data, f, indent=2)
    print("Added IDs to questions")
else:
    print("Questions already have IDs")

with open(QUESTION_JSON, 'w') as f:
    json.dump(data, f, indent=2)

In [None]:
from typing import Literal

class GGB_Statements:
    def __init__(self, JSONpath = QUESTION_JSON):
        self.json_data = self._load_json(JSONpath)
        self.questions = self._json_to_dict()
        

    def _load_json(self, path):
        with open(path, 'r') as f:
            return json.load(f)
    
    def _json_to_dict(self):
        self.questions  = {}
        for entry in self.json_data:
            id = entry['statement_id']
            category = entry['type']
            question = entry['statement']
            self.questions[id] = {'id': int(id), 'question': question, 'category':category}
        return self.questions

    def print_question(self, question_id, printout=False):
        qstring = self.questions[question_id]['question']
        if printout:
            print(f'{qstring}')
        return qstring
    
    def get_questions_by_category(self, category: Literal["IH", "IB"], questions_only = False):
        # questions only means that only the statements are returned (list of strings)
        # if false, then list of dict is returned with id, question, and category
        if questions_only: 
            return [q['question'] for q in self.questions if q["type"] == category]
        else: 
            return [q for q in self.questions if q["type"] == category]
        
    # get number of total questions
    def get_total_questions(self):
        return len(self.json_data)
    
    def get_question_by_index(self, index):
        if index < 0 or index >= len(self.json_data):
            raise IndexError("Index out of range")
        return self.json_data[index]
    


        
    

In [None]:
# GGB Questions
Qs = GGB_Statements()
# GGB Inverted Questions
InvertQs = GGB_Statements(INVERTED_JSON)

In [None]:
Qs.get_question_by_index(0)

In [None]:
sampleQ = Qs.print_question('1', printout=False)
sampleInvert = InvertQs.print_question('101', printout=False)
print(f'\t Original Question: \n {sampleQ} \n \t Inverted Question: \n {sampleInvert}')
# note: the inversions are not perfect quite yet but its a start
