# Complete Code for Fine-tuning GPT-4.1-mini on Shadertoy Top1000

## 1. Setup and Dataset Download

In [1]:
# Robust environment setup: forcibly remove the env directory, then create fresh

env_name = "iterative-shader-lab"
env_path = f"$HOME/miniforge3/envs/{env_name}"

# Remove the directory if it exists (even if not a valid conda env)
!rm -rf {env_path}

# Now create the environment from scratch
!conda create -y -n {env_name} python=3.12

# Install dependencies
!conda install -y -n {env_name} pip
!conda run -n {env_name} pip install openai pandas tqdm numpy matplotlib

# Install jupyter in the env and register the kernel
!conda run -n {env_name} pip install ipykernel
!conda run -n {env_name} python -m ipykernel install --user --name {env_name} --display-name "Python ({env_name})"

print("Restart the notebook and select the 'Python (iterative-shader-lab)' kernel for all further steps. Restart your IDE if the 'Python (iterative-shader-lab)' kernel does not show up at first.")

Channels:
 - defaults
 - conda-forge
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done


    current version: 24.11.3
    latest version: 25.5.0

Please update conda by running

    $ conda update -n base -c conda-forge conda



## Package Plan ##

  environment location: /Users/soham/miniforge3/envs/iterative-shader-lab

  added / updated specs:
    - python=3.12


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    expat-2.7.1                |       h313beb8_0         156 KB
    python-3.12.9              |       h99e199e_0        13.9 MB
    setuptools-78.1.1          |  py312hca03da5_0         2.2 MB
    wheel-0.45.1               |  py312hca03da5_0         148 KB
    ------------------------------------------------------------
                                           Total:        16.4 MB

The following NEW packages will be INSTALLED:

  b

In [1]:
# Import all necessary libraries
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
from datetime import datetime
import subprocess
import re
import csv

# Set up warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# 1. Clone the repository’s master branch if it doesn't already exist
repo_url = "https://github.com/pandahop/shaderCrawl.git"
clone_dir = "shaderCrawl"

if not os.path.isdir(clone_dir):
    print(f"Cloning master branch from {repo_url}...")
    result = subprocess.run(
        ["git", "clone", "--single-branch", "--branch", "master", repo_url, clone_dir],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        print("Error cloning repository:")
        print(result.stderr)
        raise RuntimeError("Failed to clone repository")
    else:
        print("Repository cloned (master branch).")
else:
    print("Repository already exists. Ensuring we’re on master branch...")
    # If it already exists, switch to master
    subprocess.run(["git", "-C", clone_dir, "fetch"], check=True)
    subprocess.run(["git", "-C", clone_dir, "checkout", "master"], check=True)
    subprocess.run(["git", "-C", clone_dir, "pull", "origin", "master"], check=True)
    print("Checked out master and pulled latest.")

# 2. Define paths
input_dir = os.path.join(clone_dir, "raw_pages")
output_csv = "shaders_extracted.csv"

# 3. Regular expression to locate the gShaders array in each HTML file
gshaders_pattern = re.compile(
    r"gShaders\s*=\s*(\[\s*\{.*?\}\s*\]);",
    flags=re.DOTALL
)

# 4. Container for rows to write to CSV
rows = []

# 5. Walk through the directory and process each HTML file
for root, dirs, files in os.walk(input_dir):
    for filename in files:
        if filename.endswith(".html"):
            filepath = os.path.join(root, filename)
            with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()

                # Search for the gShaders JSON array
                match = gshaders_pattern.search(content)
                if not match:
                    continue

                json_text = match.group(1)
                try:
                    shaders_list = json.loads(json_text)
                except json.JSONDecodeError:
                    continue

                # Extract description and code from each render pass
                for shader in shaders_list:
                    renderpasses = shader.get("renderpass", [])
                    top_description = shader.get("info", {}).get("description", "")

                    for rp in renderpasses:
                        desc = rp.get("description", "").strip() or top_description.strip()
                        code = rp.get("code", "").strip()

                        if code:
                            rows.append([desc, code])

# 6. Write all collected rows into the CSV
with open(output_csv, "w", encoding="utf-8", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["description", "code"])
    writer.writerows(rows)

print(f"Extraction complete! CSV file saved to: {output_csv}")

Repository already exists. Ensuring we’re on master branch...


Already on 'master'


Your branch is up to date with 'origin/master'.


From https://github.com/pandahop/shaderCrawl
 * branch            master     -> FETCH_HEAD


Already up to date.
Checked out master and pulled latest.
Extraction complete! CSV file saved to: shaders_extracted.csv


## 2. Explore the Dataset

In [3]:
import pandas as pd

# Path to your new CSV
csv_path = './shaders_extracted.csv'

# Load the CSV (change nrows=1 to load more if you want)
df = pd.read_csv(csv_path)

# Pick a row to explore (e.g., the first row)
row_idx = 0
row = df.iloc[row_idx]

# Display the row as a DataFrame for readability
display(pd.DataFrame([row]))

# If you want to print the code field
code_col = next((col for col in df.columns if 'code' in col.lower()), None)
if code_col:
    print(f"\nShader code preview (first 500 chars):\n{row[code_col][:500]}")
else:
    print("No code column found in this CSV.")

Unnamed: 0,description,code
0,goofing some more...,#define FARCLIP 35.0\n\n#define MARCHSTEPS ...



Shader code preview (first 500 chars):
#define FARCLIP    35.0

#define MARCHSTEPS 60
#define AOSTEPS    8
#define SHSTEPS    10
#define SHPOWER    3.0

#define PI         3.14
#define PI2        PI*0.5    

#define AMBCOL     vec3(1.0,1.0,1.0)
#define BACCOL     vec3(1.0,1.0,1.0)
#define DIFCOL     vec3(1.0,1.0,1.0)

#define MAT1       1.0

#define FOV 1.0


/***********************************************/
float rbox(vec3 p, vec3 s, float r) {	
    return length(max(abs(p)-s+vec3(r),0.0))-r;
}
float torus(vec3 p, vec2 t) {
    vec2


## 3. Preprocess and Convert to JSONL Format

In [14]:
import json
import random
from tqdm import tqdm
import os
import pandas as pd

# --- DEBUG: Inspect your DataFrame ---
print("Columns in DataFrame:", df.columns.tolist())
print("First row of DataFrame:\n", df.iloc[0])

# --- Conversion Functions ---
def create_jsonl_dataset(df, output_path, system_prompt, validation_split=0.1, max_samples=None):
    """
    Convert DataFrame to JSONL format for OpenAI fine-tuning
    
    Args:
        df: Pandas DataFrame with shader data
        output_path: Path to save the JSONL file
        system_prompt: System prompt to use for all examples
        validation_split: Fraction of data to use for validation
        max_samples: Maximum number of samples to include (None for all)
    
    Returns:
        Tuple of (train_path, val_path)
    """
    # Make a copy and shuffle
    df_copy = df.copy()
    df_copy = df_copy.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Limit samples if specified
    if max_samples and max_samples < len(df_copy):
        df_copy = df_copy.iloc[:max_samples]
    
    # Calculate split indices
    val_size = int(len(df_copy) * validation_split)
    train_df = df_copy.iloc[val_size:]
    val_df = df_copy.iloc[:val_size]
    
    train_path = f"{output_path}_train.jsonl"
    val_path = f"{output_path}_val.jsonl"
    
    # Process training data
    print(f"Creating training set with {len(train_df)} examples...")
    _create_jsonl_file(train_df, train_path, system_prompt)
    
    # Process validation data
    print(f"Creating validation set with {len(val_df)} examples...")
    _create_jsonl_file(val_df, val_path, system_prompt)
    
    return train_path, val_path

def _safe_strip(val):
    """Convert to string and strip, or return empty string if missing/NaN."""
    if pd.isna(val):
        return ""
    return str(val).strip()

def _create_jsonl_file(df, output_path, system_prompt):
    """Helper function to create a single JSONL file"""
    os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
    n_written = 0
    n_skipped = 0
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        # Try the most likely column names, fallback to empty string
        desc = _safe_strip(row.get('description', ''))
        code = _safe_strip(row.get('code', ''))
        
        # Skip if code is empty
        if not code:
            n_skipped += 1
            if n_skipped <= 5:
                print(f"Skipping row {idx} (empty code): desc='{desc}'")
            continue

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Description: {desc}"},
            {"role": "assistant", "content": code}
        ]
        with open(output_path, 'a', encoding='utf-8') as f:
            f.write(json.dumps({"messages": messages}) + "\n")
        n_written += 1
    print(f"Written: {n_written}, Skipped: {n_skipped}")

# --- MAIN EXECUTION ---
system_prompt = "You are a GLSL expert. Given a shader description, produce complete GLSL (vertex+fragment) shader code."

train_path, val_path = create_jsonl_dataset(
    df, 
    './data/shadertoy-1000',
    system_prompt,
    validation_split=0.1,
    max_samples=None  # Set a number like 100 for testing, or None for all
)

print(f"Training data saved to: {train_path}")
print(f"Validation data saved to: {val_path}")

# --- Show a sample of the output ---
def print_jsonl_sample(path, n=5):
    print(f"\nSample from {path}:")
    if not os.path.exists(path):
        print("File does not exist.")
        return
    with open(path) as f:
        for i, line in enumerate(f):
            print(line.strip())
            if i + 1 >= n:
                break

print_jsonl_sample(train_path)
print_jsonl_sample(val_path)

Columns in DataFrame: ['description', 'code']
First row of DataFrame:
 description                                 goofing some more...
code           #define FARCLIP    35.0\n\n#define MARCHSTEPS ...
Name: 0, dtype: object
Creating training set with 1370 examples...


100%|██████████| 1370/1370 [00:00<00:00, 8917.65it/s]


Written: 1370, Skipped: 0
Creating validation set with 152 examples...


100%|██████████| 152/152 [00:00<00:00, 4946.80it/s]

Written: 152, Skipped: 0
Training data saved to: ./data/shadertoy-1000_train.jsonl
Validation data saved to: ./data/shadertoy-1000_val.jsonl

Sample from ./data/shadertoy-1000_train.jsonl:
{"messages": [{"role": "system", "content": "You are a GLSL expert. Given a shader description, produce complete GLSL (vertex+fragment) shader code."}, {"role": "user", "content": "Description: This is supposed to be a clone of the Windows \"Mystify\" screensaver. I guess I got close but you can definitely tell that it's not as good as the original.\n\nMusic: https://soundcloud.com/3kliksphilip/under-the-pizza"}, {"role": "assistant", "content": "//Make random values more random\nfloat randSeed = 0.;\n\n#define FLOORI(x) float(int(floor(x)))\n\n/* old function\nfloat smoothRand(float interval, float seed) {\n    float next = rand(1.+floor(iTime/interval)+seed);\n    float curr = rand(floor(iTime/interval)+seed);\n    randSeed++;\n    return mix(curr, next, fract(iTime/interval));\n}\n*/\n\nfloat smoo




## 4. Validate the JSONL Files

In [25]:
# Check basic stats of prepared files
import json

def count_tokens_in_jsonl(file_path):
    """Count tokens in a JSONL file and calculate expected costs"""
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    input_tokens = 0
    output_tokens = 0
    examples = 0
    
    for line in lines:
        example = json.loads(line)
        messages = example['messages']
        
        # Rough token estimation (4 chars ≈ 1 token)
        for msg in messages:
            if msg['role'] in ['system', 'user']:
                input_tokens += len(msg['content']) // 4
            elif msg['role'] == 'assistant':
                output_tokens += len(msg['content']) // 4
        examples += 1
    
    # Estimate costs ($5.00 per 1M tokens)
    input_cost = (input_tokens / 1000000) * 5.00
    output_cost = (output_tokens / 1000000) * 5.00
    total_cost = input_cost + output_cost
    
    return {
        'examples': examples,
        'input_tokens': input_tokens,
        'output_tokens': output_tokens,
        'total_tokens': input_tokens + output_tokens,
        'estimated_cost': total_cost
    }

# Check training data stats
train_stats = count_tokens_in_jsonl(train_path)
print(f"Training data statistics:")
for k, v in train_stats.items():
    print(f" - {k}: {v}")

# Check validation data stats
val_stats = count_tokens_in_jsonl(val_path)
print(f"\nValidation data statistics:")
for k, v in val_stats.items():
    print(f" - {k}: {v}")

# Calculate total cost for 3 epochs
n_epochs = 2
total_cost_estimate = train_stats['estimated_cost'] * n_epochs + val_stats['estimated_cost']
print(f"\nEstimated total cost for {n_epochs} epochs: ${total_cost_estimate:.2f}")

Training data statistics:
 - examples: 1370
 - input_tokens: 86125
 - output_tokens: 2630566
 - total_tokens: 2716691
 - estimated_cost: 13.583454999999999

Validation data statistics:
 - examples: 152
 - input_tokens: 9382
 - output_tokens: 285599
 - total_tokens: 294981
 - estimated_cost: 1.474905

Estimated total cost for 2 epochs: $28.64


## 5. Upload Files and Start Fine-tuning

In [26]:
import os
import time
import json
from datetime import datetime

# Import and instantiate the OpenAI v1 client
from openai import OpenAI

# 1. Read your API key from the environment (or set directly)
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise RuntimeError("Please set the OPENAI_API_KEY environment variable")
client = OpenAI(api_key=api_key)

# 2. Check for available GPT-4.1-mini models
print("Checking available models for fine-tuning...")
all_models = client.models.list().data
available_models = [m.id for m in all_models if "gpt-4.1-mini" in m.id]
print(f"Available GPT-4.1-mini models: {available_models}")

# 3. Choose a base model (adjust if needed)
base_model = "gpt-4.1-mini-2025-04-14"
if base_model not in available_models:
    raise RuntimeError(f"Base model '{base_model}' not found in available models.")

# 5. Upload the training file
print(f"Uploading training file: {train_path} …")
with open(train_path, "rb") as train_file:
    training_upload = client.files.create(
        file=train_file,
        purpose="fine-tune"
    )
training_file_id = training_upload.id
print(f"Training file uploaded with ID: {training_file_id}")

# 6. Upload the validation file
print(f"Uploading validation file: {val_path} …")
with open(val_path, "rb") as val_file:
    validation_upload = client.files.create(
        file=val_file,
        purpose="fine-tune"
    )
validation_file_id = validation_upload.id
print(f"Validation file uploaded with ID: {validation_file_id}")

# 7. Wait for both files to be processed
print("Waiting for file processing to complete…")
for file_id in (training_file_id, validation_file_id):
    while True:
        file_info = client.files.retrieve(file_id)
        status = file_info.status
        print(f"File {file_id} status: {status}")
        if status == "processed":
            break
        time.sleep(10)

# 8. Construct a unique suffix using current timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
suffix = f"shadertoy_{timestamp}"

# 9. Create the fine-tuning job
print(f"Starting fine-tuning job with base model '{base_model}' …")
job = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model=base_model,
    suffix=suffix,
    hyperparameters={
        "n_epochs": 2,
        "learning_rate_multiplier": 0.1
    }
)
job_id = job.id
print(f"Fine-tuning job created successfully. Job ID: {job_id}")

# 10. Save job metadata locally for later reference
job_record = {
    "job_id": job_id,
    "training_file": training_file_id,
    "validation_file": validation_file_id,
    "base_model": base_model,
    "suffix": suffix,
    "timestamp": timestamp
}
record_filename = f"finetune_job_{timestamp}.json"
with open(record_filename, "w", encoding="utf-8") as f:
    json.dump(job_record, f, indent=2)
print(f"Job details saved to '{record_filename}'")


Checking available models for fine-tuning...
Available GPT-4.1-mini models: ['gpt-4.1-mini', 'gpt-4.1-mini-2025-04-14']
Uploading training file: ./data/shadertoy-1000_train.jsonl …
Training file uploaded with ID: file-SXnJEcYEWwJBDPtoxP2Kzs
Uploading validation file: ./data/shadertoy-1000_val.jsonl …
Validation file uploaded with ID: file-4mdEuedwKYpwbVqXTXeZE9
Waiting for file processing to complete…
File file-SXnJEcYEWwJBDPtoxP2Kzs status: processed
File file-4mdEuedwKYpwbVqXTXeZE9 status: processed
Starting fine-tuning job with base model 'gpt-4.1-mini-2025-04-14' …
Fine-tuning job created successfully. Job ID: ftjob-XsIS8espyrRQysFVIpASDVW5
Job details saved to 'finetune_job_20250601_121335.json'


## 6. Monitor Fine-tuning Progress

In [33]:
import openai
import time
import json

def monitor_job_v1(job_id, poll_interval=60):
    """Monitor fine-tuning job progress using OpenAI Python SDK v1.x+"""
    print(f"Monitoring fine-tuning job: {job_id}")
    
    while True:
        job_info = openai.fine_tuning.jobs.retrieve(job_id)
        status = job_info.status
        print(f"Status: {status}")
        
        if hasattr(job_info, "trained_tokens"):
            print(f"Trained tokens: {job_info.trained_tokens}")
        
        if status in ["succeeded", "failed", "cancelled"]:
            print(f"Job {status}!")
            if status == "succeeded":
                print(f"Fine-tuned model: {job_info.fine_tuned_model}")
                # Save the model ID
                with open(f"finetune_complete_{job_id}.json", 'w') as f:
                    json.dump({
                        "job_id": job_id,
                        "fine_tuned_model": job_info.fine_tuned_model,
                        "status": status
                    }, f, indent=2)
            return job_info
        
        # Get events to see training metrics
        events = openai.fine_tuning.jobs.list_events(job_id, limit=10)
        for event in events.data:
            print(f"Event: {event.message}")
        
        print(f"Waiting {poll_interval} seconds for next update...")
        time.sleep(poll_interval)

job_info = monitor_job_v1(job_id)

Monitoring fine-tuning job: ftjob-XsIS8espyrRQysFVIpASDVW5
Status: running
Trained tokens: None
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-SXnJEcYEWwJBDPtoxP2Kzs and validation file: file-4mdEuedwKYpwbVqXTXeZE9
Event: Created fine-tuning job: ftjob-XsIS8espyrRQysFVIpASDVW5
Waiting 60 seconds for next update...
Status: running
Trained tokens: None
Event: Fine-tuning job started
Event: Files validated, moving job to queued state
Event: Validating training file: file-SXnJEcYEWwJBDPtoxP2Kzs and validation file: file-4mdEuedwKYpwbVqXTXeZE9
Event: Created fine-tuning job: ftjob-XsIS8espyrRQysFVIpASDVW5
Waiting 60 seconds for next update...
Status: running
Trained tokens: None
Event: Step 11/2740: training loss=1.97
Event: Step 10/2740: training loss=1.19
Event: Step 9/2740: training loss=1.32
Event: Step 8/2740: training loss=0.64
Event: Step 7/2740: training loss=0.65
Event: Step 6/2740: training loss=0.79
Event: S

## 7. Test the Fine-tuned Model

In [None]:
# Function to test the fine-tuned model
def test_fine_tuned_model(model_id, test_prompts):
    """Test the fine-tuned model with example prompts"""
    print(f"Testing fine-tuned model: {model_id}")
    
    results = []
    
    for i, prompt in enumerate(test_prompts):
        print(f"\n--- Test Example {i+1} ---")
        print(f"Prompt: {prompt}")
        
        try:
            response = openai.ChatCompletion.create(
                model=model_id,
                messages=[
                    {"role": "system", "content": "You are a GLSL expert. Generate"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.2,
                max_tokens=1500
            )
            
            generated_code = response.choices[0].message.content
            print(f"Generated code (first 200 chars):\n{generated_code[:200]}...")
            
            results.append({
                "prompt": prompt,
                "response": generated_code
            })
        except Exception as e:
            print(f"Error: {e}")
    
    # Save results
    with open(f"test_results_{model_id.split(':')[-1]}.json", 'w') as f:
        json.dump(results, f, indent=2)
    
    return results

# Example test prompts
test_prompts = [
    "Title: Neon Tunnel\nDescription: A rotating neon tunnel effect with RGB glow.",
    "Title: Ocean Waves\nDescription: Realistic ocean wave simulation with foam and reflections.",
    "Title: Fractal Explorer\nDescription: Interactive mandelbrot fractal with zoom and color controls."
]

# To test the model:
# Uncomment and run when your model is ready
# Replace with your actual model ID
model_id = "gpt-4.1-mini"
test_results = test_fine_tuned_model(model_id, test_prompts)

## 8. Analyze Results and Fine-tuning Metrics

In [None]:
# Function to analyze fine-tuning metrics
def analyze_training_metrics(job_id):
    """Retrieve and visualize training metrics for a fine-tuning job"""
    events = openai.FineTuningJob.list_events(id=job_id)
    metrics = []
    
    for event in events.data:
        if "train/train_loss" in event.message or "valid/loss" in event.message:
            # Parse the metrics (this is a bit hacky, might need adjustments)
            parts = event.message.split()
            for part in parts:
                if "=" in part:
                    key, value = part.split("=")
                    try:
                        metrics.append({
                            "metric": key.strip(),
                            "value": float(value.strip()),
                            "step": len(metrics) + 1
                        })
                    except ValueError:
                        pass
    
    # Group by metric type
    metric_types = {}
    for m in metrics:
        if m["metric"] not in metric_types:
            metric_types[m["metric"]] = []
        metric_types[m["metric"]].append((m["step"], m["value"]))
    
    # Plot metrics
    plt.figure(figsize=(12, 8))
    for metric, values in metric_types.items():
        steps, vals = zip(*values)
        plt.plot(steps, vals, marker='o', label=metric)
    
    plt.title('Fine-tuning Metrics')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()
    
    return metrics

# To analyze metrics:
# Uncomment and run when your job is complete
# metrics = analyze_training_metrics(job_id)

## 9. Integration with Shader Application

In [None]:
# Example function to integrate the fine-tuned model with your shader application
def generate_shader(model_id, title, description):
    """Generate a shader using the fine-tuned model"""
    try:
        response = openai.ChatCompletion.create(
            model=model_id,
            messages=[
                {"role": "system", "content": "You are a GLSL expert."},
                {"role": "user", "content": f"Title: {title}\nDescription: {description}"}
            ],
            temperature=0.3,
            max_tokens=2000
        )
        
        generated_code = response.choices[0].message.content
        return {
            "success": True,
            "code": generated_code
        }
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }

# Example usage in your application:
"""
# Replace with your actual model ID
model_id = "ft:gpt-4.1-mini:shadertoy:12345"

# This would be connected to your UI
title = "Sunset Ocean"
description = "A peaceful ocean scene at sunset with reflective water and soft waves"

result = generate_shader(model_id, title, description)
if result["success"]:
    # Send to your shader renderer
    shader_code = result["code"]
    # ... render the shader ...
else:
    print(f"Error generating shader: {result['error']}")
"""