In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


setup

In [1]:

import os, cv2, random, math, shutil, zipfile
from glob import glob
from pathlib import Path
from tqdm import tqdm
from collections import defaultdict


Paths and params

In [None]:

# Diff input modes depending on status of images and structure of folder
## 'flat' is single folder of frames
## 'subfolders' is parent folder incl. subfolders per texture
srcMode = "subfolders"       # "flat" or "subfolders"

# If srcMode=="flat":
srcDir = "/content/drive/MyDrive/training_data/"
# If srcMode=="subfolders":
parentFolder = "/content/drive/MyDrive/training_data/textures"

# OUTPUT DATASET setup
outputFolder = "/content/drive/MyDrive/training_data/LoRA_Textures"
imgFolder = f"{outputFolder}/images"
captionsFolder = f"{outputFolder}/captions"
os.makedirs(imgFolder, exist_ok=True)
os.makedirs(captionsFolder, exist_ok=True)

# === CROPPING ===
tileSize   = 512   # 512 or 768 options for LoRA
overlap      = 256   # overlap
maxPerImage = 40    # capping for limit per source image

# === QUALITY FILTER ===
varThresh = 40.0   # low is less

# === CAPTIONS ===
trig = "uavtex" #triggers LoRA activayion
defaultPrompt = "nadir uav photo, fine microtexture"

# based on how the dataset folders have been named (only used in srcMode='subfolders')
FOLDER_CAPTION_MAP = {
    "roofs": "roof tiles",
    "roof": "roof tiles",
    "red_roofs": "terracotta roof tiles",
    "metal_roofs": "metal roof texture",
    "asphalt": "asphalt road, realistic grain",
    "road": "asphalt road, realistic grain",
    "grass": "grass lawn, fine blades",
    "vegetation": "vegetation canopy, microtexture",
    "concrete": "concrete surface, subtle grain",
    "rubble": "debris, rocks and dirt ground texture",
    "dirt": "dirt ground texture",
    "water": "bodies of water, reflective, blue, green"
}

# === EXPORT ===
zipPath = "/content/uavtex_dataset.zip"

# Reset output (optional)
for p in [imgFolder, captionsFolder]:
    for f in glob(f"{p}/*"):
        os.remove(f)
print("Output ready at:", outputFolder)


Output ready at: /content/drive/MyDrive/training_data/LoRA_Textures


utils

In [None]:

imageExtensions = {".jpg",".jpeg",".png",".bmp",".tif",".tiff"}

def is_image(path):
    return Path(path).suffix.lower() in imageExtensions

def var_of_laplacian(bgr):
    return cv2.Laplacian(cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY), cv2.CV_64F).var()

def crop_image(img, tile, stride):
    H, W = img.shape[:2]
    for y in range(0, max(1, H-tile+1), stride):
        for x in range(0, max(1, W-tile+1), stride):
            c = img[y:y+tile, x:x+tile]
            if c.shape[0]==tile and c.shape[1]==tile:
                yield y, x, c

def safe_basename(p):
    import re
    return re.sub(r"[^a-zA-Z0-9_\-]", "_", Path(p).stem)

def class_from_parent(path):
    return Path(path).parent.name.lower()

def caption_for_class(cls):
    phrase = FOLDER_CAPTION_MAP.get(cls, cls.replace("_"," "))
    return f"{trig}, {defaultPrompt}, {phrase}"

def write_caption(base, text):
    with open(f"{captionsFolder}/{base}.txt","w",encoding="utf-8") as f:
        f.write(text.strip()+"\n")


Data gathering

In [None]:

sources = []
if srcMode == "flat":
    sources = [p for p in glob(f"{srcDir}/*") if is_image(p)]
elif srcMode == "subfolders":
    for cls_dir in sorted(glob(f"{parentFolder}/*")):
        if os.path.isdir(cls_dir):
            sources += [p for p in glob(f"{cls_dir}/*") if is_image(p)]
else:
    raise ValueError("srcMode must be 'flat' or 'subfolders'")

print(f"Found {len(sources)} source images")
print("Sample:", sources[:5])


Found 114 source images
Sample: ['/content/drive/MyDrive/training_data/textures/asphalt/asphalt.png', '/content/drive/MyDrive/training_data/textures/asphalt/asphalt_2.png', '/content/drive/MyDrive/training_data/textures/asphalt/asphalt_3.png', '/content/drive/MyDrive/training_data/textures/asphalt/asphalt_4.png', '/content/drive/MyDrive/training_data/textures/asphalt/asphalt_5.png']


Cropping & captions

In [None]:

count = 0
for src in tqdm(sources):
    bgr = cv2.imread(src)
    if bgr is None:
        continue
    used = 0
    cls = class_from_parent(src) if srcMode=="subfolders" else None
    for (y, x, crop) in crop_image(bgr, tileSize, overlap
    ):
        if used >= maxPerImage:
            break
        if var_of_laplacian(crop) < varThresh:
            continue
        base = f"{safe_basename(src)}_{y}_{x}"
        cv2.imwrite(f"{imgFolder}/{base}.png", crop)
        cap = caption_for_class(cls) if cls else f"{trig}, {defaultPrompt}"
        write_caption(base, cap)
        used += 1
        count += 1

print("Total tiles written:", count)


100%|██████████| 114/114 [01:53<00:00,  1.00it/s]

Total tiles written: 130





Zip Export

In [None]:

def zipdir(path, ziph):
    for root, dirs, files in os.walk(path):
        for file in files:
            p = os.path.join(root, file)
            arc = os.path.relpath(p, start=os.path.dirname(path))
            ziph.write(p, arc)

with zipfile.ZipFile(zipPath, 'w', zipfile.ZIP_DEFLATED) as zf:
    zipdir(outputFolder, zf)
print("Wrote ZIP:", zipPath)