In [1]:
# Standard library imports
import os
import sys
import json
import uuid

# Third-party imports
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage.draw

# Custom module import
sys.path.append("../")  # Add parent directory to Python path
import LLMP as L

# ------------------- Configuration -------------------

# Main output directory
main_output_dir = "finetuning-EXP4-5000-10epochs"

# Subdirectories for images and JSON files
image_output_dir = os.path.join(main_output_dir, "images")
json_output_dir = os.path.join(main_output_dir, "json")

# Create directories if they don't exist
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(json_output_dir, exist_ok=True)

# Define tasks with associated questions
tasks = {
    "framed": "Estimate the lengths of the two bars with framing. Both lengths should fall between 49 and 60 pixels. No explanation. Format of the answer [xx, xx]",
    "unframed": "Estimate the lengths of the two bars without framing. Both lengths should fall between 49 and 60 pixels. No explanation. Format of the answer [xx, xx]"
}


# Number of images to generate for each task
num_images_per_task = 5000

# List to store all data from all tasks
combined_dataset = []

# Loop through each task
for task, question in tasks.items():
    print(f"Generating images and dataset for task: {task}")
    
    # Set up a loop to generate images and collect their labels
    for i in range(num_images_per_task):
        # Generate the image and label for the task using GPImage
        image_array, label = L.GPImage.figure12(task)  # Ensure GPImage is defined or imported

        # Convert labels to Python-native floats for JSON compatibility
        label = [round(float(val), 2) for val in label]
        
        # Convert the array to uint8 format (values from 0 to 255) for saving as an image
        image_array_uint8 = (image_array * 255).astype(np.uint8)

        # Convert the NumPy array to a PIL image
        pil_image = Image.fromarray(image_array_uint8)

        # Generate a unique ID for the image
        unique_id = str(uuid.uuid4())

        # Save the image with the unique ID
        image_filename = os.path.join(image_output_dir, f"{unique_id}.jpg")
        pil_image.save(image_filename)

        # Create a JSON entry for the dataset
        json_entry = {
            'id': unique_id,
            'image': f"{unique_id}.jpg",
            'question': question,
            'value': label  # Normalized label values as native Python floats
        }

        # Append the JSON entry to the combined dataset list
        combined_dataset.append(json_entry)

# Save the combined dataset as a single JSON file in the JSON folder
combined_json_filename = "combined_dataset.json"
combined_json_filepath = os.path.join(json_output_dir, combined_json_filename)

with open(combined_json_filepath, 'w') as json_file:
    json.dump(combined_dataset, json_file, indent=4)

print(f"Images saved in '{image_output_dir}' and combined dataset saved as '{combined_json_filename}' in '{json_output_dir}'")


Generating images and dataset for task: framed
Generating images and dataset for task: unframed
Images saved in 'finetuning-EXP4-5000-10epochs/images' and combined dataset saved as 'combined_dataset.json' in 'finetuning-EXP4-5000-10epochs/json'
