# GPT-2-XL Voice Inference — Gradio

Load your fine-tuned GPT-2-XL and generate from it via a Gradio web UI.

## Setup

1. **Set runtime to T4 GPU**
2. **Run all cells** — Gradio launches in Cell 4 with a shareable link
3. Open the link, type a prompt, get a draft in your voice

In [None]:
# === Cell 1: Install + mount ===
!pip install -q transformers gradio

import torch
from google.colab import drive
drive.mount("/content/drive")

DRIVE_BASE = "/content/drive/MyDrive/voice-ft"
MODEL_PATH = f"{DRIVE_BASE}/models/gpt2-voice-v1"

GEN_KWARGS = dict(
    temperature=0.8,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1,
    do_sample=True,
)

In [None]:
# === Cell 2: Load model ===
from transformers import AutoModelForCausalLM, AutoTokenizer

print("Loading fine-tuned GPT-2-XL...")
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(f"Ready on {device}")

In [None]:
# === Cell 3: Generate function ===

def generate(prompt, max_tokens):
    input_text = f"{prompt}\n\n---\n\n"
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    input_len = inputs["input_ids"].shape[1]
    with torch.no_grad():
        output = model.generate(
            **inputs, **GEN_KWARGS,
            max_new_tokens=int(max_tokens),
            pad_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(output[0][input_len:], skip_special_tokens=True)

In [None]:
# === Cell 4: Launch Gradio ===
import gradio as gr

gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Prompt", lines=4, placeholder="Write an essay about..."),
        gr.Slider(64, 512, value=512, step=64, label="Max tokens"),
    ],
    outputs=gr.Textbox(label="Output", lines=30),
    title="Lily's Voice — GPT-2-XL (Pre-RLHF)",
    description="Full fine-tuned on Lily's Substack essays. Pre-RLHF model — no alignment training, raw creativity.",
    examples=[
        ["Write a personal Substack Note about class in America, from the perspective of a Chinese first-gen immigrant whose family is lower-middle class.", 512],
        ["Write an essay about why dark humor saved my life.", 512],
        ["Write a Substack Note about the difference between Eileen Gu and Alyssa Liu.", 512],
    ],
).launch(share=True)