# Data Generation

In [None]:
# configure api
from dotenv import load_dotenv
import os

load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")

In [None]:
from google import genai
from google.genai import types

client = genai.Client(api_key=gemini_api_key)

model = [
    "gemini-2.5-flash-preview-04-17"
]

generate_content_config = types.GenerateContentConfig(
    response_mime_type="application/json",
)

In [None]:
# extract the prompt in `Prompt Engineering.txt`
with open("prompts/sample.txt", "r") as file:
    original_prompt = file.read()

print(original_prompt)

In [None]:
# loop each prompt with update patient id to the llm
# and get the response and save it as a json file as data/patient_id.json
# the patient_id is the patient id in the prompt
import time  # Import the time module for delay
import uuid

history = []

# Loop to repeat the process 50 times
for iteration in range(40):
    print(f"Iteration {iteration + 1} of 40")

    # Calculate the start and end indices for the current batch
    start_index = iteration * 10
    end_index = start_index + 10

    # Always start with the original prompt and add current history
    # This way we're not continuously appending to the prompt
    prompt = f"Unique session ID: {uuid.uuid4()} \n\n{original_prompt}"

    # Get the response from the LLM
    response = client.models.generate_content(model=model[0], contents=prompt, config=generate_content_config)

    # Update the history with the current response
    history.append(response)

    # Save the response as a JSON file
    with open(f"data/raw/run_{iteration}.json", "w") as file:
        file.write(response.text)

    # Wait for one minute before the next iteration
    if iteration < 49:  # Avoid waiting after the last iteration
        print("Waiting for 1 minute before the next iteration...")
        if iteration % 5 == 0:
            pass
        else:
            time.sleep(60)

%echo "All iterations completed. Check the data/raw directory for the output files."