In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
DATA_DIR = os.getenv("DATA_DIR_PATH")

# Generate set of questions to run multi-dimension qualtrics survey on

In [27]:
import json
import random

# Adjust paths as necessary
input_file_path = "../../data/datasets/main/test-prompts.json"  # Path where the query data is stored
output_file_path = "../../evals/human/multi-dimension-queries.json"

# Read data from JSON file

with open(input_file_path, 'r') as file:
    data = json.load(file)

In [28]:

# Create a dictionary to store filtered exercises by type
filtered = {
    'short-answer': [],
    'true-false': [],
    'long-answer': []
}

# Filter data into categories
for item in data:
    exercise_type = item['requested_exercise_format']
    if exercise_type in filtered:
        filtered[exercise_type].append(item)

# Select random samples from each category
selected_exercises = {}
for key, items in filtered.items():
    if len(items) >= 4:  # Check if there are enough items to sample from
        selected_exercises[key] = random.sample(items, 4)
    else:
        selected_exercises[key] = items  # Take all if less than num_samples

In [29]:
with open(output_file_path, 'w') as file:
    json.dump(selected_exercises, file, indent=4)

# Connect the results of the runs to the selected queries

In [32]:
# Define file paths
from utils import find_responses

first_file_path = '../../evals/human/multi-dimension-queries.json'  # Update with actual path
second_file_path = '../../data/complete_runs/5-pipeline-gpt-3-5.json'  # Update with actual path
output_file_path = '../../evals/human/multi-dimension-queries-pipeline-gpt-3-5.json'  # Path for the output JSON file

# Execute the function
find_responses(first_file_path, second_file_path, output_file_path)

Results written to ../../evals/human/multi-dimension-queries-pipeline-gpt-3-5.json


# Merge and shuffle the files

In [1]:
# List of file paths to merge
from utils import merge_and_shuffle_json_files


json_file_paths = [
    '../../evals/human/multi-dimension-queries-baseline-gpt-3-5.json', 
    '../../evals/human/multi-dimension-queries-assistant-gpt-3-5.json',
    '../../evals/human/multi-dimension-queries-pipeline-gpt-3-5.json'  
]

# Output file path
output_file_path = '../../evals/human/multi-dimension-survey.json'  # Update with the desired output path

# Execute the merging and shuffling
merge_and_shuffle_json_files(json_file_paths, output_file_path)

Merged and shuffled data written to ../../evals/human/multi-dimension-survey.json


# Generate set of questions to run pairwise comparison on

In [10]:
import json
import random

# Adjust paths as necessary
input_file_path = "../../data/datasets/main/test-prompts.json"  # Path where the query data is stored
output_file_path_pairwise = "../../evals/human/pairwise-comparison/pairwise-comparison-queries.json"
exclusion_data_file = "../../evals/human/multi-dimension/multi-dimension-queries.json"

# Read data from JSON file

with open(input_file_path, 'r') as file:
    data = json.load(file)

with open(exclusion_data_file, 'r') as file:
    ex_data = json.load(file)

In [11]:
from utils import filter_excluded
filtered_data = filter_excluded(data, ex_data)
selected_exercises = {}
for key, items in filtered_data.items():
    if len(items) >= 4:
        selected_exercises[key] = random.sample(items, 4)
    else:
        selected_exercises[key] = items

In [12]:
# Flatten the selected exercises into a single list for output
final_selected_queries = []
for key in selected_exercises:
    final_selected_queries.extend(selected_exercises[key])

In [13]:
with open(output_file_path_pairwise, 'w') as file:
    json.dump(selected_exercises, file, indent=4)

# Connect the runs to the selected pairwise comparison queries

In [4]:
# Define file paths
from utils import find_responses

first_file_path = '../../evals/human/pairwise-comparison/pairwise-comparison-queries.json'  # Update with actual path
second_file_path = '../../data/complete_runs/5-pipeline-gpt-3-5.json'  # Update with actual path
output_file_path = '../../evals/human/pairwise-comparison/pairwise-comparison-queries-pipeline-gpt-3-5.json'  # Path for the output JSON file

# Execute the function
find_responses(first_file_path, second_file_path, output_file_path)

{'Can you make a short defining question about the significance of tourism in Tanzania?': {'query': 'Can you make a short defining question about the significance of tourism in Tanzania?', 'human_response': 'Define employment and provide an example of how tourism increases the employment rate in Tanzania.', 'requested_exercise_format': 'short-answer', 'topic': "Tourism's significance in Tanzania", 'exercise_type': 'short-answer'}, 'Hello, can u generate a short-answer question regarding the different types of human activities for my form two students?': {'query': 'Hello, can u generate a short-answer question regarding the different types of human activities for my form two students?', 'human_response': 'List the three major categories of human activities.', 'requested_exercise_format': 'short-answer', 'topic': 'Varieties of human activities', 'exercise_type': 'short-answer'}, 'Hello, please provide a short definition exercise about issues in livestock farming for my class.': {'query':

# Merge and shuffle the files

In [5]:
# List of file paths to merge
from utils import merge_and_shuffle_json_files


json_file_paths = [
    '../../evals/human/pairwise-comparison/pairwise-comparison-queries-pipeline-gpt-3-5.json', 
    '../../evals/human/pairwise-comparison/pairwise-comparison-queries-pipeline-gpt-4.json',
    '../../evals/human/pairwise-comparison/pairwise-comparison-queries-pipeline-llama3.json'  
]

# Output file path
output_file_path = '../../evals/human/pairwise-comparison/pairwise-comparison-survey.json'  # Update with the desired output path

# Execute the merging and shuffling
merge_and_shuffle_json_files(json_file_paths, output_file_path)

Merged and shuffled data written to ../../evals/human/pairwise-comparison/pairwise-comparison-survey.json


# Split into 3 separate surveys

In [6]:
from utils import read_json, split_and_save_json
# Define file paths
input_file_path = '../../evals/human/pairwise-comparison/pairwise-comparison-survey.json'  # Update with your actual file path
base_output_path = '../../evals/human/pairwise-comparison/pairwise-comparison-survey'  # Base path for output files

# Read data from the input JSON file
data = read_json(input_file_path)

# Split the data into 3 parts and save to separate files
split_and_save_json(data, base_output_path)

Data written to ../../evals/human/pairwise-comparison/pairwise-comparison-survey-part-1.json
Data written to ../../evals/human/pairwise-comparison/pairwise-comparison-survey-part-2.json
Data written to ../../evals/human/pairwise-comparison/pairwise-comparison-survey-part-3.json
