In [4]:
# 1
# Install Dependencies

!pip install -q "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q transformers datasets accelerate peft trl bitsandbytes


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.9/224.9 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.2/181.2 kB[0m [31m22.0 MB/s[0m eta [36m0

In [5]:
# 2
# Check GPU
!nvidia-smi


Sat Jan 24 10:14:36 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   38C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
# 3
# Mount Drive
# You will be asked for authentification here

from google.colab import drive
drive.mount("/content/drive")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# 4
# Define Paths.
# I uploaded the files to finetune as a zip

from pathlib import Path

ZIP_PATH = Path("/content/drive/MyDrive/precog/doyle-for-finetuning.zip")

WORK_DIR = Path("/content/work_doyle")   # fast local workspace
WORK_DIR.mkdir(parents=True, exist_ok=True)

EXTRACT_DIR = WORK_DIR / "extracted"
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)

OUT_DIR = Path("/content/drive/MyDrive/unsloth_data")  # persistent output on Drive
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("ZIP_PATH:", ZIP_PATH)
print("EXTRACT_DIR:", EXTRACT_DIR)
print("OUT_DIR:", OUT_DIR)


ZIP_PATH: /content/drive/MyDrive/precog/doyle-for-finetuning.zip
EXTRACT_DIR: /content/work_doyle/extracted
OUT_DIR: /content/drive/MyDrive/unsloth_data


In [None]:
# 5
# Unzip them

!unzip -q "{ZIP_PATH}" -d "{EXTRACT_DIR}"
print("Unzipped. Showing a few extracted files:")
!find "{EXTRACT_DIR}" -type f | head -n 30


replace /content/work_doyle/extracted/doyle-for-finetuning/024_MSH_10_Greek_Interpreter.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: Unzipped. Showing a few extracted files:
/content/work_doyle/extracted/doyle-for-finetuning/002_Sign_of_Four.txt
/content/work_doyle/extracted/doyle-for-finetuning/050_CBSH_1_Mazarin_Stone.txt
/content/work_doyle/extracted/doyle-for-finetuning/007_ASH_05_Five_Orange_Pips.txt
/content/work_doyle/extracted/doyle-for-finetuning/049_HLB_7_His_Last_Bow.txt
/content/work_doyle/extracted/doyle-for-finetuning/018_MSH_04_Stockbrokers_Clerk.txt
/content/work_doyle/extracted/doyle-for-finetuning/024_MSH_10_Greek_Interpreter.txt
/content/work_doyle/extracted/doyle-for-finetuning/015_MSH_01_Silver_Blaze.txt
/content/work_doyle/extracted/doyle-for-finetuning/032_RSH_04_Solitary_Cyclist.txt
/content/work_doyle/extracted/doyle-for-finetuning/026_MSH_12_Final_Problem.txt
/content/work_doyle/extracted/doyle-for-finetuning/045_HLB_4_Red_Circle.txt
/content/work_doyle/extracte

In [None]:
# Cell 6: Create train/holdout JSONL from extracted .txt

# This cell:
# finds all .txt under the extracted directory (even nested)
# splits by book file
# chunks into 250–700 word segments
# writes:
# train_DOYLE.jsonl
# holdout_DOYLE.jsonl
# split_DOYLE.txt (for documentation)

import json
import random

AUTHOR_TAG = "DOYLE"
HOLDOUT_FRACTION = 0.2

MIN_WORDS_PER_CHUNK = 150
MAX_WORDS_PER_CHUNK = 350

random.seed(42)

def split_into_paragraphs(text: str):
    paras = [p.strip() for p in text.split("\n\n") if p.strip()]
    paras = [" ".join(p.split()) for p in paras]
    return paras

def chunk_paragraphs(paras, min_words=250, max_words=700):
    chunks = []
    current = []
    current_wc = 0

    for p in paras:
        wc = len(p.split())
        if wc < 20:
            continue

        if current_wc + wc > max_words:
            if current_wc >= min_words:
                chunks.append(" ".join(current))
            current = [p]
            current_wc = wc
        else:
            current.append(p)
            current_wc += wc

    if current_wc >= min_words:
        chunks.append(" ".join(current))

    return chunks

# Find all books
book_files = sorted(EXTRACT_DIR.rglob("*.txt"))
if not book_files:
    raise RuntimeError(f"No .txt files found under: {EXTRACT_DIR}")

print("Found total .txt books:", len(book_files))

# Book-level split
random.shuffle(book_files)
holdout_n = max(1, int(len(book_files) * HOLDOUT_FRACTION))

holdout_books = set(book_files[:holdout_n])
train_books = book_files[holdout_n:]

train_jsonl_path   = OUT_DIR / f"train_{AUTHOR_TAG}.jsonl"
holdout_jsonl_path = OUT_DIR / f"holdout_{AUTHOR_TAG}.jsonl"
split_info_path    = OUT_DIR / f"split_{AUTHOR_TAG}.txt"

train_count = 0
holdout_count = 0

with open(train_jsonl_path, "w", encoding="utf-8") as f_train, \
     open(holdout_jsonl_path, "w", encoding="utf-8") as f_holdout:

    for book_path in book_files:
        text = book_path.read_text(encoding="utf-8", errors="ignore")
        paras = split_into_paragraphs(text)
        chunks = chunk_paragraphs(paras, MIN_WORDS_PER_CHUNK, MAX_WORDS_PER_CHUNK)

        target = f_holdout if book_path in holdout_books else f_train
        for chunk in chunks:
            target.write(json.dumps({"text": chunk}, ensure_ascii=False) + "\n")

        if book_path in holdout_books:
            holdout_count += len(chunks)
        else:
            train_count += len(chunks)

with open(split_info_path, "w", encoding="utf-8") as f:
    f.write("HOLDOUT BOOKS (not used for fine-tuning):\n")
    for p in sorted(holdout_books):
        f.write(p.name + "\n")
    f.write("\nTRAIN BOOKS (used for fine-tuning):\n")
    for p in train_books:
        f.write(p.name + "\n")

print("Created JSONL files:")
print("Train chunks:", train_count, "->", train_jsonl_path)
print("Holdout chunks:", holdout_count, "->", holdout_jsonl_path)
print("Split info:", split_info_path)


Found total .txt books: 40
Created JSONL files:
Train chunks: 1002 -> /content/drive/MyDrive/unsloth_data/train_DOYLE.jsonl
Holdout chunks: 203 -> /content/drive/MyDrive/unsloth_data/holdout_DOYLE.jsonl
Split info: /content/drive/MyDrive/unsloth_data/split_DOYLE.txt


In [3]:
# 7
# sanity check of JSONL

!ls -lh /content/drive/MyDrive/unsloth_data/
!head -n 2 /content/drive/MyDrive/unsloth_data/train_DOYLE.jsonl


total 1.9M
-rw------- 1 root root 320K Jan 23 19:49 holdout_DOYLE.jsonl
-rw------- 1 root root 1.3K Jan 23 19:49 split_DOYLE.txt
-rw------- 1 root root 1.6M Jan 23 19:49 train_DOYLE.jsonl
{"text": "We were seated at breakfast one morning, my wife and I, when the maid brought in a telegram. It was from Sherlock Holmes and ran in this way: \"Have you a couple of days to spare? Have just been wired for from the west of England in connection with Boscombe Valley tragedy. Shall be glad if you will come with me. Air and scenery perfect. Leave Paddington by the 11:15.\" \"Oh, Anstruther would do your work for you. You have been looking a little pale lately. I think that the change would do you good, and you are always so interested in Mr. Sherlock Holmes' cases.\" \"I should be ungrateful if I were not, seeing what I gained through one of them,\" I answered. \"But if I am to go, I must pack at once, for I have only half an hour.\" My experience of camp life in Afghanistan had at least had the

In [None]:
# 8

import gc, torch
gc.collect()
torch.cuda.empty_cache()


In [None]:
# 9

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

torch.cuda.set_device(0)

MODEL_NAME = "unsloth/llama-3.1-8b-bnb-4bit"
max_seq_length = 768

DATA_PATH = "/content/drive/MyDrive/unsloth_data/train_DOYLE.jsonl"
OUT_DIR   = "/content/drive/MyDrive/unsloth_out/DOYLE"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
    device_map = {"": 0},
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 8,
    lora_dropout = 0.0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)

dataset = load_dataset("json", data_files=DATA_PATH)["train"]

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 20,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        fp16 = True,
        bf16 = False,
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 42,
        output_dir = OUT_DIR,
        save_strategy = "epoch",
        report_to = "none",
    ),
)

trainer.train()

model.save_pretrained(OUT_DIR)
tokenizer.save_pretrained(OUT_DIR)

print("Saved LoRA to:", OUT_DIR)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2026.1.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Generating train split: 0 examples [00:00, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/1002 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,002 | Num Epochs = 1 | Total steps = 251
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 20,971,520 of 8,051,232,768 (0.26% trained)


Step,Training Loss
10,1.9028
20,1.6687
30,1.6333
40,1.5498
50,1.5381
60,1.4941
70,1.4902
80,1.5458
90,1.42
100,1.5765


Saved LoRA to: /content/drive/MyDrive/unsloth_out/DOYLE


In [1]:
# 10

import torch
import re
from unsloth import FastLanguageModel

FastLanguageModel.for_inference(model)

def extract_last_paragraph(text: str) -> str:
    text = text.strip()
    text = re.sub(r"(Write ONE standalone paragraph.*?\n)+", "", text, flags=re.IGNORECASE).strip()
    parts = [p.strip() for p in re.split(r"\n\s*\n", text) if len(p.strip()) > 50]
    return parts[-1] if parts else text

prompt = """
You are writing a SINGLE paragraph of 120–180 words.

Theme (do not mention this label explicitly): Deception and Disguise.

Hard constraints:
- Do NOT copy, quote, paraphrase, or reproduce any existing Conan Doyle text.
- Do NOT use Sherlock Holmes, Watson, or any recognizable Doyle plot event.
- Do NOT mention any names, places, time period, or nationalities.
- Do NOT include dialogue quotes.
- Do NOT give a climax or full plot resolution.
- Standalone paragraph only.

Style target:
- Write in the narrative rhythm and vocabulary of Arthur Conan Doyle (subtle Victorian-style prose).
- Keep it natural, not essay-like.

Output format:
Return ONLY the paragraph text.
""".strip()

inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=220,
        temperature=0.9,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.15,
        no_repeat_ngram_size=4,
    )

raw = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Remove prompt echo if present
if raw.startswith(prompt):
    raw = raw[len(prompt):].strip()

final_para = extract_last_paragraph(raw)

print("\n===== GENERATED PARAGRAPH =====\n")
print(final_para)
print("\n===============================")
print("Word count:", len(final_para.split()))


ModuleNotFoundError: No module named 'unsloth'

In [7]:
# 11

from unsloth import FastLanguageModel
import torch

MODEL_NAME = "unsloth/llama-3.1-8b-bnb-4bit"
LORA_DIR   = "/content/drive/MyDrive/unsloth_out/DOYLE"
max_seq_length = 768

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
    device_map = {"": 0},
)

model.load_adapter(LORA_DIR)
FastLanguageModel.for_inference(model)

print("Loaded base model + DOYLE adapter for generation.")


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Loaded base model + DOYLE adapter for generation.


In [12]:
!find /content/drive/MyDrive -iname "*topic*.csv" | head -n 50


/content/drive/MyDrive/precog/topic-list.csv


In [17]:
import csv, random, time, re, hashlib
from pathlib import Path
import torch
from unsloth import FastLanguageModel

# =========================
# CONFIG
# =========================
TOPICS_CSV = Path("/content/drive/MyDrive/precog/topic-list.csv")   # <-- change if needed
OUT_DIR    = Path("/content/drive/MyDrive/class4-finetuned-mimicry/DOYLE")

TOTAL_PARAS = 250
MIN_WORDS = 120
MAX_WORDS = 180

MAX_ATTEMPTS_PER_FILE = 15          # more retries = fewer junk saves
SLEEP_BETWEEN_CALLS = 0.15
SEED = 42

random.seed(SEED)
OUT_DIR.mkdir(parents=True, exist_ok=True)

FastLanguageModel.for_inference(model)

print("Saving to:", OUT_DIR)

# =========================
# HELPERS
# =========================
def word_count(text: str) -> int:
    return len(text.strip().split())

def normalize_text(text: str) -> str:
    """Normalize for dedup detection."""
    text = text.lower().strip()
    text = re.sub(r"\s+", " ", text)
    return text

def clean_output(text: str) -> str:
    """
    Convert output into ONE paragraph and remove obvious junk.
    """
    text = text.strip()

    # Remove repeated instruction echoes
    text = re.sub(r"(?is)write one standalone paragraph.*?(?=\n|$)", "", text).strip()
    text = re.sub(r"(?is)theme\s*:.*?(?=\n|$)", "", text).strip()
    text = re.sub(r"(?is)hard constraints\s*:.*?(?=\n|$)", "", text).strip()
    text = re.sub(r"(?is)output format\s*:.*?(?=\n|$)", "", text).strip()

    # Flatten into single paragraph
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    text = " ".join(lines).strip()

    # Strip surrounding quotes
    if (text.startswith('"') and text.endswith('"')) or (text.startswith("“") and text.endswith("”")):
        text = text[1:-1].strip()

    # Remove trailing unfinished "previous" / "infer from" style cut-offs
    text = re.sub(r"\bfrom their own previous\s*$", "", text, flags=re.IGNORECASE).strip()

    return text

def looks_like_prompt_leak(text: str) -> bool:
    """
    Detect if the model is printing meta rules / instruction junk.
    """
    bad_phrases = [
        "if you return more than one paragraph",
        "i will reject your answer",
        "example answer",
        "no headers",
        "no footers",
        "submit both together",
        "word doc",
        "docx",
        "pdf",
        "rules",
        "hard constraints",
        "output format",
        "theme:",
        "write one standalone paragraph",
    ]
    t = text.lower()
    return any(bp in t for bp in bad_phrases)

def looks_like_direct_book_copy(text: str) -> bool:
    """
    Heuristic: detect "obviously Gutenberg Doyle" phrasing.
    (Not perfect, but catches your examples.)
    """
    suspicious_phrases = [
        "my most memorable adventures",
        "brown-shaded lamps",
        "smoky mirrors",
        "red-tiled firesides",
        "minor tragedies which shock the soul",
        "i confess that",
        "leaving my readers to infer",
        "second lieutenant of marines",
        "music-hall artiste",
    ]
    t = text.lower()
    return any(sp in t for sp in suspicious_phrases)

def too_similar_to_seen(text: str, seen_hashes: set) -> bool:
    h = hashlib.md5(normalize_text(text).encode("utf-8")).hexdigest()
    return h in seen_hashes

def load_topics(csv_path: Path):
    topics = []
    with open(csv_path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            topics.append({
                "topic_id": row["topic_id"].strip(),
                "topic_name": row["topic_name"].strip(),
                "topic_description": row["topic_description"].strip(),
            })
    if not topics:
        raise RuntimeError("No topics loaded. Fix your CSV path or file.")
    return topics

# =========================
# LOAD TOPICS + BUILD SCHEDULE
# =========================
topics = load_topics(TOPICS_CSV)

base = TOTAL_PARAS // len(topics)     # 12
extra = TOTAL_PARAS % len(topics)     # 10

schedule = []
for t in topics:
    schedule += [t] * base

# randomly assign the remaining extras
schedule += random.sample(topics, extra)
random.shuffle(schedule)

print("Topics loaded:", len(topics))
print("Total scheduled paragraphs:", len(schedule))

# =========================
# SMALL DIVERSITY BOOSTERS
# =========================
DIVERSITY_MODES = [
    "a reflective inner monologue",
    "a descriptive narrative moment",
    "a moral dilemma unfolding in real time",
    "a tense confrontation (no dialogue quotes)",
    "a calm analytical tone",
    "a melancholic quiet tone",
]
SETTINGS = [
    "indoors late at night",
    "in a quiet public place",
    "during a formal gathering",
    "while traveling",
    "in a tense private moment",
]

# =========================
# GENERATION LOOP
# =========================
seen_hashes = set()
generated = 0

# Resume-safe: count already existing .txt files
existing = list(OUT_DIR.glob("*.txt"))
if existing:
    print(f"Found {len(existing)} existing files. Will skip them.")
    generated = len(existing)

for idx, topic in enumerate(schedule, start=1):
    tid = topic["topic_id"]
    tname = topic["topic_name"]
    tdesc = topic["topic_description"]

    outfile = OUT_DIR / f"{tid}_{idx:03d}.txt"
    if outfile.exists():
        continue

    mode = random.choice(DIVERSITY_MODES)
    setting = random.choice(SETTINGS)

    prompt = f"""
Write ONE NEW, ORIGINAL paragraph of {MIN_WORDS}-{MAX_WORDS} words.

Theme (do NOT mention this label explicitly): {tname}
Theme description: {tdesc}

ABSOLUTE RULES (must follow):
- The paragraph must be completely NEW and ORIGINAL.
- Do NOT copy, quote, paraphrase, or closely imitate any real Conan Doyle sentence.
- Do NOT reproduce any scene from a real book.
- Do NOT use Sherlock Holmes or Watson.
- Do NOT mention any names, places, time period, dates, or nationalities.
- Do NOT include dialogue quotes ("...").
- Do NOT include headings, notes, disclaimers, or meta-instructions.
- Output EXACTLY ONE paragraph of prose.
- Output ONLY the paragraph text and nothing else.

Style target:
- Victorian narrative rhythm similar to Conan Doyle, but with entirely new wording and events.

Diversity constraints:
- Write it as {mode}.
- Setting: {setting}.
- Vary sentence length and rhythm.
""".strip()

    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    success = False
    last_wc = None

    for attempt in range(1, MAX_ATTEMPTS_PER_FILE + 1):
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=260,
                min_new_tokens=140,      # prevents 2-word quits
                temperature=1.05,        # more variety
                top_p=0.92,
                do_sample=True,
                repetition_penalty=1.22,
                no_repeat_ngram_size=5,
                use_cache=True,
            )

        raw = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove prompt echo
        if raw.startswith(prompt):
            raw = raw[len(prompt):].strip()
        raw = raw.replace(prompt, "").strip()

        text = clean_output(raw)
        wc = word_count(text)
        last_wc = wc

        # Hard rejections
        if wc < MIN_WORDS or wc > MAX_WORDS:
            continue
        if len(text) < 250:
            continue
        if looks_like_prompt_leak(text):
            continue
        if looks_like_direct_book_copy(text):
            continue
        if too_similar_to_seen(text, seen_hashes):
            continue

        # Save
        h = hashlib.md5(normalize_text(text).encode("utf-8")).hexdigest()
        seen_hashes.add(h)

        outfile.write_text(text + "\n", encoding="utf-8")
        generated += 1
        print(f"[{generated}/{TOTAL_PARAS}] wrote {outfile.name} ({wc} words) (attempt {attempt})")
        success = True
        break

    if not success:
        print(f"[SKIP] {outfile.name} failed after {MAX_ATTEMPTS_PER_FILE} attempts (last wc={last_wc})")

    time.sleep(SLEEP_BETWEEN_CALLS)

print("\nDone.")
print("Final output folder:", OUT_DIR)
print("Example listing:")
!ls -lh "{OUT_DIR}" | head -n 25


Saving to: /content/drive/MyDrive/class4-finetuned-mimicry/DOYLE
Topics loaded: 20
Total scheduled paragraphs: 250
Found 3 existing files. Will skip them.
[4/250] wrote T20_001.txt (174 words) (attempt 9)
[5/250] wrote T01_002.txt (165 words) (attempt 3)
[6/250] wrote T12_003.txt (165 words) (attempt 4)
[7/250] wrote T16_004.txt (176 words) (attempt 6)
[8/250] wrote T01_005.txt (145 words) (attempt 2)
[9/250] wrote T06_006.txt (171 words) (attempt 11)
[10/250] wrote T20_007.txt (171 words) (attempt 1)
[11/250] wrote T16_008.txt (171 words) (attempt 6)
[SKIP] T04_009.txt failed after 15 attempts (last wc=208)
[12/250] wrote T05_010.txt (153 words) (attempt 10)
[13/250] wrote T05_011.txt (140 words) (attempt 7)
[14/250] wrote T01_012.txt (165 words) (attempt 4)
[15/250] wrote T11_013.txt (135 words) (attempt 3)
[16/250] wrote T03_014.txt (157 words) (attempt 5)
[17/250] wrote T20_015.txt (147 words) (attempt 5)
[18/250] wrote T19_016.txt (133 words) (attempt 2)


KeyboardInterrupt: 