# Synthetically-Created, Human-Evaluated Reasoning Dataset

- CommonsenseQA, StrategyQA
- Varitions on questions / tasks / queries
- Human eval
- Think step-by-step reasoning prompts
- Huamn eval
- Answers to query + reasoning
- Human eval

Aim for 1,000 starting sampleset.


In [20]:
from datasets import load_dataset
from huggingface_hub import InferenceClient

from pprint import pprint
import random
import os

In [2]:
"""
CommonsenseQA from TAU (Tel Aviv University)

https://arxiv.org/abs/1811.00937

"""
dataset_id = 'tau/commonsense_qa'

dataset = load_dataset(dataset_id)
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 9741
    })
    validation: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1221
    })
    test: Dataset({
        features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
        num_rows: 1140
    })
})


In [3]:
train_dataset = dataset['train']
print(train_dataset)

Dataset({
    features: ['id', 'question', 'question_concept', 'choices', 'answerKey'],
    num_rows: 9741
})


In [44]:
len(train_dataset)

9741

In [41]:
q_no = 13
pprint(train_dataset[q_no])

{'answerKey': 'A',
 'choices': {'label': ['A', 'B', 'C', 'D', 'E'],
             'text': ['loss of heat',
                      'revenge',
                      'expansion',
                      'relaxation',
                      'calm down']},
 'id': 'b63b9809c203321d6659ddf8551894bf',
 'question': "James was cooling off two quickly.  He would die if he didn't "
             'find some way to stop what?',
 'question_concept': 'cooling off'}


In [48]:
import json
import time

model_id = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

queries = random.sample(range(1, len(train_dataset) + 1), 1000)

responses = []

# Loop through the queries
for q in queries:
    query = train_dataset[q]['question'].replace('  ', ' ')
    choices = train_dataset[q]['choices']['text']

    client = InferenceClient(
        model=model_id,
        token=os.getenv('HUGGINGFACE_TOKEN'),
    )

    prompt = f"Q: {query}\n\
        Choices: {', '.join(choices)}\n\
        A: Let's think step by step."
    pprint(prompt)

    response = {'id': train_dataset[q]['id']}
            
    for i in range(3):
        while True:
            try:
                output = client.text_generation(
                    prompt,
                    max_new_tokens=100,
                    do_sample=True,
                    seed=random.randint(0, 10000)
                )
                break
            except:
                print("An interruption occurred. Retrying in 3 minutes.")
                time.sleep(180)  

        response[f'response {i+1}'] = output
        
    pprint(response)
    
    responses.append(response.copy())
    with open('dev/responses.json', 'w') as f:
        json.dump(responses, f)
        
    time.sleep(random.uniform(1, 6))

('Q: What can happen to someone too sure of their learning?\n'
 '        Choices: growth, gaining knowledge, enlightenment, knowing more, '
 'overconfidence\n'
 "        A: Let's think step by step.")
{'id': '1fe48d12b6f6e4e38f4445f3ec60d5c5',
 'response 1': ' When someone is too sure of their learning, they become '
               'overconfident, thinking that they know everything. This can '
               'prevent them from learning more, as they stop seeking new '
               'knowledge and ideas. They might also miss out on '
               'enlightenment, as they close themselves off to new '
               'perspectives. Overall, their growth might be stunted, as they '
               'stop challenging themselves and expanding their understanding. '
               'So, out of the given choices, the most appropriate answer is '
               'overconfidence.',
 'response 2': ' Overconfidence can lead someone to fail to question their own '
               'preconceptions, in t

In [55]:
import os
import json
from datetime import datetime
import uuid

file_path = '/mnt/d/Projects/forge-pipeline/dev/'

date = datetime.now().strftime("%Y-%m-%d")
uuid_str = str(uuid.uuid4())
file_name = f"responses_{date}_{uuid_str}.json"

save_path = os.path.join(file_path, file_name)

with open(save_path, 'w') as f:
    json.dump(responses, f)