**Stage 1 - Test Suite**

In [None]:
# @title 1. Setup Environment
import os
import sys
import subprocess

# 1.1 Install Dependencies
print("Installing dependencies...")
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers peft accelerate scikit-learn pandas matplotlib
!apt-get install -y libgl1-mesa-glx xvfb > /dev/null
!pip install -q moderngl

# 1.2 Mount Drive
from google.colab import drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# 1.3 Configure Paths
PROJECT_ROOT = '/content/drive/MyDrive/projects/EarthShader'
ADAPTER_PATH = os.path.join(PROJECT_ROOT, 'checkpoints/stage1_final')
LIB_DIR = os.path.join(PROJECT_ROOT, 'lib')
GEN_DIR = os.path.join(LIB_DIR, 'generators')

# 1.4 Fix Imports (The "Package" Fix)
if LIB_DIR not in sys.path:
    sys.path.append(LIB_DIR)

for folder in [LIB_DIR, GEN_DIR]:
    init_file = os.path.join(folder, '__init__.py')
    if not os.path.exists(init_file):
        with open(init_file, 'w') as f: f.write("")

print("Environment Ready.")

In [None]:
# @title 2. Load model and robust renderer
import torch
import moderngl
import numpy as np
import os
from PIL import Image
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
from peft import PeftModel

# --- SHADER RENDERER ENGINE ---
class ShaderRenderer:
    def __init__(self, width=256, height=256):
        self.width, self.height = width, height
        try:
            self.ctx = moderngl.create_context(standalone=True, backend='egl')
        except:
            self.ctx = moderngl.create_context(standalone=True)

        self.vbo = self.ctx.buffer(np.array([-1,-1, 1,-1, -1,1, 1,1], dtype='f4'))
        self.fbo = self.ctx.simple_framebuffer((width, height), components=3)
        self.vert = "#version 330\nin vec2 in_vert; out vec2 uv; void main(){ uv=in_vert; gl_Position=vec4(in_vert,0,1); }"

    def render(self, frag_code, path):
        full_shader = f"#version 330\nuniform vec2 iResolution; out vec4 f; {frag_code}\nvoid main(){{ vec4 c; mainImage(c,gl_FragCoord.xy); f=c; }}"
        try:
            prog = self.ctx.program(vertex_shader=self.vert, fragment_shader=full_shader)
            if 'iResolution' in prog: prog['iResolution'].value = (self.width, self.height)
            vao = self.ctx.simple_vertex_array(prog, self.vbo, 'in_vert')
            self.fbo.use(); self.fbo.clear(); vao.render(moderngl.TRIANGLE_STRIP)
            Image.frombytes('RGB', (self.width, self.height), self.fbo.read()).transpose(Image.FLIP_TOP_BOTTOM).save(path)
            return True
        except: return False

renderer = ShaderRenderer(256, 256)

# --- MODEL LOADING ---
print(f"Loading stage 1 adapter from: {ADAPTER_PATH}")
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)

# Load base and merge LoRA weights.
base = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", quantization_config=bnb_config, device_map="auto")
model = PeftModel.from_pretrained(base, ADAPTER_PATH)
model.eval()

# Consistent resolution is key for valid MSE metrics.
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=256*256, max_pixels=256*256)
print("Model and Renderer successfully initialized.")

In [None]:
# @title 3. Run test suite and classification audit
import re
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import classification_report
from generators.primitives import generate_primitive

def extract_shapes(text):
    """Identifies primitive names within the reasoning text."""
    found = []
    # This checks the model's textual analysis for specific geometry keywords.
    for s in ['circle', 'square', 'annulus']:
        if s in text.lower():
            found.append(s)
    return sorted(list(set(found)))

# Set test sample size for baseline verification.
TEST_SAMPLES = 100
results = []

print(f"Starting performance audit on {TEST_SAMPLES} unseen samples...")

for i in tqdm(range(TEST_SAMPLES)):
    # Use seeds outside the training range to evaluate generalization.
    gt_code, gt_analysis = generate_primitive(5000 + i)
    renderer.render(gt_code, "gt.png")
    gt_img = Image.open("gt.png").convert("RGB")

    # Identify labels from the Ground Truth generator.
    gt_shapes = extract_shapes(gt_analysis)

    # Prepare prompt using the standard Stage 1 template.
    conv = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Reverse engineer the GLSL shader code for this texture. Include analysis."}]}]
    prompt = processor.apply_chat_template(conv, add_generation_prompt=True, tokenize=False)
    inputs = processor(text=[prompt], images=[gt_img], return_tensors="pt").to(model.device)

    # Generate response with deterministic sampling.
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=512, do_sample=False)

    pred_text = processor.batch_decode(out[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]

    # Identify labels from the model's reasoning block.
    pred_shapes = extract_shapes(pred_text)

    # Extract code and attempt a validation render.
    pred_code = pred_text.split("```glsl")[-1].split("```")[0].strip() if "```" in pred_text else pred_text
    success = renderer.render(pred_code, "pred.png")

    # Calculate visual accuracy using Mean Squared Error.
    mse = 1.0
    if success:
        p_arr = np.array(Image.open("pred.png").convert("RGB")).astype(float) / 255.0
        g_arr = np.array(gt_img).astype(float) / 255.0
        mse = np.mean((g_arr - p_arr) ** 2)

    results.append({
        "id": i,
        "gt_count": len(gt_shapes),
        "pred_count": len(pred_shapes),
        "gt_type": gt_shapes[0] if gt_shapes else "unknown",
        "pred_type": pred_shapes[0] if pred_shapes else "unknown",
        "compiled": success,
        "mse": mse
    })

# Compile results into a final audit report.
df = pd.DataFrame(results)
print("\n" + "="*50 + "\nSTAGE 1 PERFORMANCE AUDIT\n" + "="*50)
print(f"1. Compilation Success:  {df['compiled'].mean():.1%}")
print(f"2. Visual Precision ($MSE$): {df[df['compiled']]['mse'].mean():.4f}")

print("\n3. Complexity Accuracy (Single vs Double):")
# Measures the model's ability to count objects correctly.
print(classification_report(df['gt_count'], df['pred_count'], zero_division=0))

print("\n4. Shape Identity Accuracy (Circle vs Square vs Annulus):")
# Filter for single shape examples for a clean identity metric.
singles = df[df['gt_count'] == 1]
print(classification_report(singles['gt_type'], singles['pred_type'], zero_division=0))
print("="*50)

In [None]:
# @title 4. Auto-Shutdown
# This cell will only run after the training cell finishes.
import time
from google.colab import runtime

print("Training finished. Saving is complete.")
print("Shutting down runtime to save Compute Units in 60 seconds...")

# Give time for the final logs to sync to Drive
time.sleep(60)

print("Goodnight.")
runtime.unassign()