In [None]:
import openai
import pandas as pd
import json
import time

openai.api_key = "APT_key"

def generate_qa_dataset(context, api_key):
    """
    Generates a question-and-answer dataset for construction specifications using GPT-4.

    Parameters:
        context (str): The input construction specification context.
        api_key (str): OpenAI API key for accessing GPT-4.

    Returns:
        dict: A dictionary containing context and Q&A pairs.
    """
    openai.api_key = api_key

    # Define the prompt based on your instructions
    prompt = (
        "You are an expert in generating a question-and-answer (Q&A) dataset for construction specifications. "
        "Your task is to create Q&A pairs using the provided context, adhering to task-specific instructions with a Chain of Thought (CoT) reasoning process.\n\n"
        "---\n"
        "**Task-Specific Instructions with Chain of Thought (CoT):**\n"
        "1. Carefully read the provided construction specification **context**.\n"
        "2. Identify **key words** within the context, focusing on the following aspects:\n"
        "   - **Organization**: Entities or individuals responsible for tasks.\n"
        "   - **Implementation Strategies**: Technical methods or processes for accomplishing tasks.\n"
        "   - **Time Standard**: Deadlines or schedules.\n"
        "   - **Reference**: Standards, codes, or guidelines cited.\n\n"
        "3. Generate **diverse and syntactically/lexically varied questions** related to these key words by considering:\n"
        "   - **Who**: Who is responsible for the work?\n"
        "   - **What**: What installation or work elements are required?\n"
        "   - **When**: When should the work be completed?\n"
        "   - **How**: How can this work be implemented (technical strategies)?\n"
        "   - **Which reference**: Which standard or reference should be followed?\n\n"
        "4. For each generated question:\n"
        "   - Answer the question using an **exact span of text** from the context.\n"
        "   - Provide the **start and end character indices** for the answer in the context.\n\n"
        "5. Ensure the answer can be derived from the context and follows the requirements below.\n\n"
        "---\n"
        "**Output Format:**\n"
        "Provide the output as a JSON object where the context is included once, followed by an array of individual question-answer pairs, structured as:\n\n"
        "{\n"
        "  \"context\": \"<Input provision>\",\n"
        "  \"questions\": [\n"
        "    {\n"
        "      \"question\": \"<Generated question>\",\n"
        "      \"answer\": {\n"
        "        \"text\": \"<Answer span from context>\",\n"
        "        \"answer_start\": <Start index of answer in context>,\n"
        "        \"answer_end\": <End index of answer in context>\n"
        "      }\n"
        "    }\n"
        "    // Additional Q&A pairs\n"
        "  ]\n"
        "}\n\n"
        "---\n"
        "**Input Context:**\n"
        f"{context}\n\n"
        "Now generate the JSON-formatted Q&A dataset based on the above context and instructions."
    )

    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=,
        temperature=
    )

    # Parse the response
    output = response["choices"][0]["message"]["content"]

    try:
        output_json = json.loads(output)
        return json.dumps(output_json, indent=4)
    except json.JSONDecodeError:
        return "The model's output could not be parsed as valid JSON. Please review the raw output: \n" + output

if __name__ == "__main__":

    input_context = (
        "For each project, at least biannually, or as required, the Plant Manager will examine "
        "mixers and agitators to check for any buildup of hardened concrete or worn blades. If this "
        "examination reveals a problem, or if the Engineer wishes to test the quality of the concrete, "
        "slump tests may be performed with samples taken at approximately the ¼ and ¾ points as "
        "the batch is discharged. The maximum allowable slump difference shall be as follows: "
        "If the average of the two slump tests is less than 4 inches, the difference shall be less than 1 inch; "
        "if the average of the two slump tests is greater than 4 inches, the difference shall be less than 1½ inches. "
        "If the slump difference exceeds these limits, the equipment shall not be used until the "
        "faulty condition is corrected. However, the equipment may continue in use if longer mixing "
        "times or smaller loads produce batches that pass the slump uniformity tests."
    )

    api_key = "API_key"

    qa_dataset = generate_qa_dataset(input_context, api_key)
    with open("qa_dataset_individual.json", "w") as file:
        file.write(qa_dataset)

    print("Q&A dataset has been generated and saved as 'qa_dataset_individual.json'.")