**Stage 1 - Data Generation**

In [None]:
# @title 1. Setup & Dependencies
import sys
import os
import subprocess
import importlib.util

print("Installing dependencies...")
try:
    import moderngl
except ImportError:
    subprocess.check_call(['apt-get', 'install', '-y', 'libgl1-mesa-glx'])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "moderngl"])
    import moderngl

import random
import json
from tqdm import tqdm
import numpy as np
from PIL import Image

from google.colab import drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

PROJECT_ROOT = '/content/drive/MyDrive/projects/EarthShader'
sys.path.append(PROJECT_ROOT)
LIB_DIR = os.path.join(PROJECT_ROOT, 'lib')

# Ensure we can import from lib
if LIB_DIR not in sys.path:
    sys.path.insert(0, LIB_DIR)

# CONFIG
DATASET_ROOT = os.path.join(PROJECT_ROOT, 'dataset/stage1')
IMAGES_DIR = os.path.join(DATASET_ROOT, 'images')
JSONL_PATH = os.path.join(DATASET_ROOT, 'dataset.jsonl')
NUM_SAMPLES = 8000
IMG_SIZE = 512

os.makedirs(IMAGES_DIR, exist_ok=True)

In [None]:
# @title 2. Import Generator Logic
try:
    # Try standard package import
    from lib.generators.primitives import generate_primitive
    from lib.gl_renderer import ShaderRenderer
    print("Library loaded successfully.")
except ImportError as e:
    print(f"Standard import failed ({e}). Trying direct file load...")

    # Direct loading if Colab pathing is fighting us
    import importlib.util

    # Load Renderer
    spec_r = importlib.util.spec_from_file_location("gl_renderer", os.path.join(LIB_DIR, "gl_renderer.py"))
    mod_r = importlib.util.module_from_spec(spec_r)
    spec_r.loader.exec_module(mod_r)
    ShaderRenderer = mod_r.ShaderRenderer

    # Load Base
    spec_b = importlib.util.spec_from_file_location("base", os.path.join(LIB_DIR, "generators/base.py"))
    mod_b = importlib.util.module_from_spec(spec_b)
    spec_b.loader.exec_module(mod_b)

    # Load Primitives (injecting base dependency manually)
    spec_p = importlib.util.spec_from_file_location("primitives", os.path.join(LIB_DIR, "generators/primitives.py"))
    mod_p = importlib.util.module_from_spec(spec_p)
    mod_p.base = mod_b # Mock the relative import
    spec_p.loader.exec_module(mod_p)

    generate_primitive = mod_p.generate_primitive
    print("Direct file load successful.")

In [None]:
# @title 3. Execution Loop (With Force Regen)

# CONFIG
FORCE_REGEN = True  # Set to True to wipe existing data and start over

if 'renderer' not in locals():
    renderer = ShaderRenderer(width=IMG_SIZE, height=IMG_SIZE)

# Resume Logic
dataset_entries = []

if FORCE_REGEN:
    print(" [IMPORTANT] FORCE REGEN: Wiping existing registry...")
    with open(JSONL_PATH, 'w') as f:
        pass # Create empty file
    existing_count = 0
else:
    if os.path.exists(JSONL_PATH):
        with open(JSONL_PATH, 'r') as f:
            existing_count = sum(1 for line in f)
        print(f"Found {existing_count} existing samples.")
    else:
        with open(JSONL_PATH, 'w') as f:
            pass
        existing_count = 0

if existing_count < NUM_SAMPLES:
    print(f"Generating samples {existing_count} to {NUM_SAMPLES}...")

    for i in tqdm(range(existing_count, NUM_SAMPLES)):
        # CALL THE LIBRARY
        code, analysis = generate_primitive(i)

        filename = f"stage1_{i:05d}.png"
        filepath = os.path.join(IMAGES_DIR, filename)

        success = renderer.render(code, filepath)

        if success:
            entry = {
                "image_path": filepath,
                "analysis": analysis,
                "code": code
            }
            dataset_entries.append(entry)

            # Incremental Save
            if len(dataset_entries) >= 50:
                with open(JSONL_PATH, 'a') as f:
                    for e in dataset_entries:
                        f.write(json.dumps(e) + '\n')
                dataset_entries = []

    # Final Flush
    if dataset_entries:
        with open(JSONL_PATH, 'a') as f:
            for e in dataset_entries:
                f.write(json.dumps(e) + '\n')
    print("Done.")
else:
    print("Dataset already complete.")

In [None]:
# @title 5. Inspector (Random Sample)
import matplotlib.pyplot as plt
import random

def inspect_random_sample():
    if not os.path.exists(JSONL_PATH):
        print(f"Waiting for data... {JSONL_PATH} not found yet.")
        return

    # Load lines to pick a random one
    with open(JSONL_PATH, 'r') as f:
        lines = f.readlines()

    if not lines:
        print("Dataset is empty.")
        return

    # Pick random entry
    line = random.choice(lines)
    entry = json.loads(line)

    img_path = entry['image_path']
    code = entry['code']

    # Truncate code if too long for the plot
    if len(code) > 800:
        display_code = code[:800] + "\n... [Truncated]"
    else:
        display_code = code

    # Visualization
    if os.path.exists(img_path):
        # Compact layout (12x6 inches)
        plt.figure(figsize=(12, 6))

        # Left: Code
        ax_text = plt.subplot(1, 2, 1)
        ax_text.set_facecolor('#f5f5f5')
        plt.text(0.02, 0.98, display_code,
                fontsize=10,
                family='monospace',
                verticalalignment='top',
                transform=ax_text.transAxes)
        plt.axis('off')
        plt.title("Generated GLSL", fontsize=12, pad=10)

        # Right: Image
        img = Image.open(img_path)
        plt.subplot(1, 2, 2)
        plt.imshow(img)
        plt.axis('off')
        plt.title("Rendered Output", fontsize=12, pad=10)

        plt.tight_layout()
        plt.show()
    else:
        print(f"Image missing at path: {img_path}")

inspect_random_sample()