In [None]:
# @title 1. Setup environment
import os
import sys
import subprocess

# 1.1 Install Dependencies
# We include the exact Stage 1 stack but ensure bitsandbytes is the latest version.
print("Installing test suite dependencies...")
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers peft accelerate scikit-learn pandas matplotlib
!apt-get install -y libgl1-mesa-glx xvfb > /dev/null
!pip install -q moderngl

# 1.2 Mount Drive
from google.colab import drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# 1.3 Configure Paths
PROJECT_ROOT = '/content/drive/My Drive/projects/EarthShader'
# We point to the new Stage 2 finalized adapters.
ADAPTER_PATH = os.path.join(PROJECT_ROOT, 'checkpoints/stage2_final')
LIB_DIR = os.path.join(PROJECT_ROOT, 'lib')
GEN_DIR = os.path.join(LIB_DIR, 'generators')

# 1.4 Fix Imports and Module Structure
if LIB_DIR not in sys.path:
    sys.path.append(LIB_DIR)

# Ensure the library folder is recognized as a package.
for folder in [LIB_DIR, GEN_DIR]:
    init_file = os.path.join(folder, '__init__.py')
    if not os.path.exists(init_file):
        with open(init_file, 'w') as f:
            f.write("")

print("Environment Ready.")

In [None]:
# @title 2. Load model and boolean-aware renderer
import torch
import moderngl
import numpy as np
import os
from PIL import Image
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
from peft import PeftModel

# --- SHADER RENDERER ENGINE ---
class ShaderRenderer:
    def __init__(self, width=256, height=256):
        self.width, self.height = width, height
        try:
            self.ctx = moderngl.create_context(standalone=True, backend='egl')
        except:
            self.ctx = moderngl.create_context(standalone=True)

        self.vbo = self.ctx.buffer(np.array([-1,-1, 1,-1, -1,1, 1,1], dtype='f4'))
        self.fbo = self.ctx.simple_framebuffer((width, height), components=3)
        self.vert = "#version 330\nin vec2 in_vert; out vec2 uv; void main(){ uv=in_vert; gl_Position=vec4(in_vert,0,1); }"

    def render(self, frag_code, path):
        # We wrap the model's mainImage function into a valid GLSL 330 program.
        full_shader = f"#version 330\nuniform vec2 iResolution; out vec4 f; {frag_code}\nvoid main(){{ vec4 c; mainImage(c,gl_FragCoord.xy); f=c; }}"
        try:
            prog = self.ctx.program(vertex_shader=self.vert, fragment_shader=full_shader)
            if 'iResolution' in prog:
                prog['iResolution'].value = (self.width, self.height)
            vao = self.ctx.simple_vertex_array(prog, self.vbo, 'in_vert')
            self.fbo.use()
            self.fbo.clear()
            vao.render(moderngl.TRIANGLE_STRIP)
            Image.frombytes('RGB', (self.width, self.height), self.fbo.read()).transpose(Image.FLIP_TOP_BOTTOM).save(path)
            return True, None
        except Exception as e:
            return False, str(e)

renderer = ShaderRenderer(256, 256)

# --- MODEL LOADING ---
print(f"Loading Stage 2 adapters from: {ADAPTER_PATH}")
# Loading in 4-bit to mirror the training environment and ensure compatibility.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

base = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-7B-Instruct",
    quantization_config=bnb_config,
    device_map="auto"
)
model = PeftModel.from_pretrained(base, ADAPTER_PATH)
model.eval()

processor = AutoProcessor.from_pretrained(
    "Qwen/Qwen2-VL-7B-Instruct",
    min_pixels=256*256,
    max_pixels=256*256
)
print("Model and Renderer successfully initialized.")

In [None]:
# @title 3. Run test suite and classification audit
import re
import pandas as pd
import json
import torch
import numpy as np
import os
from tqdm import tqdm
from PIL import Image
from sklearn.metrics import classification_report
from generators.csg import generate_csg_scenario

def extract_logic_ops(text):
    """Identifies Boolean operations (min/max) within the reasoning or code."""
    ops = []
    text_lower = text.lower()
    # Check for union vs intersection vs subtraction logic.
    if 'min(' in text_lower:
        ops.append('union')
    if 'max(' in text_lower:
        if '-' in text_lower:
            ops.append('subtraction')
        else:
            ops.append('intersection')
    return sorted(list(set(ops)))

# Audit settings.
TEST_SAMPLES = 100
NUM_AUDIT_LOGS = 15

results = []
audit_samples = []

print(f"Starting Stage 2 Audit on {TEST_SAMPLES} complex logic samples...")

for i in tqdm(range(TEST_SAMPLES)):
    # Seeds 8000+ were reserved for final Stage 2 validation.
    seed = 8000 + i
    gt_code, gt_analysis, gt_meta = generate_csg_scenario(seed)

    # Render ground truth.
    renderer.render(gt_code, "gt.png")
    gt_img = Image.open("gt.png").convert("RGB")
    gt_logic = extract_logic_ops(gt_analysis)

    # Prepare multimodal inference prompt.
    conv = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Reverse engineer the GLSL shader code for this texture. Include analysis."}]}]
    prompt = processor.apply_chat_template(conv, add_generation_prompt=True, tokenize=False)
    inputs = processor(text=[prompt], images=[gt_img], return_tensors="pt").to(model.device)

    # Use deterministic greedy decoding for metric reliability.
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=768, do_sample=False)

    pred_text = processor.batch_decode(out[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
    pred_logic = extract_logic_ops(pred_text)

    # Extract code and attempt validation render.
    pred_code = pred_text.split("```glsl")[-1].split("```")[0].strip() if "```" in pred_text else pred_text
    success, error_log = renderer.render(pred_code, "pred.png")

    # Calculate Mean Squared Error (Regression Check).
    mse = 1.0
    if success:
        p_arr = np.array(Image.open("pred.png").convert("RGB")).astype(float) / 255.0
        g_arr = np.array(gt_img).astype(float) / 255.0
        mse = np.mean((g_arr - p_arr) ** 2)

    # Capture diagnostic logs.
    if i < NUM_AUDIT_LOGS:
        audit_samples.append({
            "sample_index": i,
            "seed": seed,
            "compilation_success": success,
            "compiler_error": error_log,
            "gt_logic": gt_logic,
            "pred_logic": pred_logic,
            "mse": mse,
            "prediction_raw": pred_text
        })

    results.append({
        "id": i,
        "gt_logic": gt_logic[0] if gt_logic else "none",
        "pred_logic": pred_logic[0] if pred_logic else "none",
        "compiled": success,
        "mse": mse
    })

# Save qualitative results to Drive.
audit_dir = os.path.join(PROJECT_ROOT, 'audits')
os.makedirs(audit_dir, exist_ok=True)
audit_path = os.path.join(audit_dir, 'stage2_audit_results.json')
with open(audit_path, 'w') as f:
    json.dump(audit_samples, f, indent=2)

df = pd.DataFrame(results)

In [None]:
# @title 4. Final performance report
import os
import pandas as pd
from sklearn.metrics import classification_report

# 4.1 Compile the Stage 2 logic metrics.
# We interpret the dataframe built in Cell 3 to assess boolean proficiency.
print("\n" + "="*60 + "\nSTAGE 2 PERFORMANCE AUDIT\n" + "="*60)
print(f"1. Compilation Success:  {df['compiled'].mean():.1%}")
print(f"2. Visual Precision (MSE): {df[df['compiled']]['mse'].mean():.4f}")

# 4.2 Logic Classification (Union vs Subtraction vs Intersection).
# This specifically measures how well the model learned the boolean 'verbs'.
print("\n3. Logic Classification Accuracy:")
print(classification_report(df['gt_logic'], df['pred_logic'], zero_division=0))

# 4.3 Regression Check (Baseline Comparison).
# We compare the current MSE against the Stage 1 baseline to ensure no 'catastrophic forgetting'.
# This uses the 0.0085 baseline established during your Stage 1 audit.
stage1_mse_baseline = 0.0085
current_mse = df[df['compiled']]['mse'].mean()

print("\n4. Regression Check (Spatial Integrity):")
if current_mse > (stage1_mse_baseline * 1.5):
    print(f"[WARNING] Potential Regression: Drift detected in spatial precision (Current MSE: {current_mse:.4f})")
else:
    print(f"[PASS] Spatial integrity maintained relative to Stage 1 baseline.")

# 4.4 Final file reference.
audit_results_path = os.path.join(PROJECT_ROOT, 'audits/stage2_audit_results.json')
print(f"\nDetailed qualitative log saved to: {audit_results_path}")
print("="*60)

In [None]:
# @title 5. Auto-shutdown
import time
from google.colab import runtime

print("Performance audit complete. Synchronization finishing...")
# Allow final Drive synchronization for the JSON audit log.
time.sleep(60)

print("Runtime disconnecting to preserve compute units.")
runtime.unassign()