# Bharat AI-SoC | YOLOv8n-obb Pothole Detection → TFLite INT8
**FIRST:** Runtime → Change runtime type → **T4 GPU** → Save

## Cell 1 — Check GPU

In [None]:
import torch, sys
print("Python :", sys.version.split()[0])
print("PyTorch:", torch.__version__)
print("CUDA   :", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU    :", torch.cuda.get_device_name(0))
    print("\nGPU OK")
else:
    print("\nNO GPU — Runtime > Change runtime type > T4 GPU > Save")


## Cell 2 — Install packages

In [None]:
import subprocess, sys
def pip(pkg):
    subprocess.run([sys.executable,"-m","pip","install","-q",pkg], check=True)

for pkg in ["ultralytics","pyyaml"]:
    print(f"Installing {pkg}...", end=" ")
    pip(pkg)
    print("OK")

import ultralytics
print("\nultralytics:", ultralytics.__version__)
print("All ready!")


## Cell 3 — Upload and extract dataset
1. Left sidebar → Files icon → Upload
2. Upload your `data_obb.zip`
3. Run this cell

In [None]:
import os, subprocess, glob

# Auto-detect uploaded zip name
ZIP_NAME = None
for candidate in ["data_obb.zip","data.zip","pothole_obb.zip","dataset.zip"]:
    if os.path.exists(candidate):
        ZIP_NAME = candidate
        print(f"Found zip: {ZIP_NAME}")
        break

if not ZIP_NAME:
    # List all zips in current dir
    zips = glob.glob("*.zip")
    if zips:
        ZIP_NAME = zips[0]
        print(f"Found zip: {ZIP_NAME}")
    else:
        raise FileNotFoundError(
            "No zip found. Upload your data_obb.zip via the Files panel (left sidebar)")

print(f"Extracting {ZIP_NAME}...")
subprocess.run(["unzip","-q","-o", ZIP_NAME,"-d","obb_dataset"], check=True)
DATASET_DIR = "obb_dataset"

# Show structure
print("\nDataset structure:")
for root, dirs, files in os.walk(DATASET_DIR):
    level = root.replace(DATASET_DIR,"").count(os.sep)
    if level < 4:
        nf = len(files)
        print("  "*level + os.path.basename(root) + f"/  ({nf} files)")

# Count images
total = 0
for split in ["train","valid","val","test"]:
    p = os.path.join(DATASET_DIR, split, "images")
    if os.path.isdir(p):
        imgs = (glob.glob(p+"/*.jpg")+glob.glob(p+"/*.png")+
                glob.glob(p+"/*.jpeg"))
        if imgs:
            print(f"  {split}: {len(imgs)} images")
            total += len(imgs)
print(f"  TOTAL: {total} images")


## Cell 4 — Verify OBB labels and find folders

In [None]:
import os, glob

def find_dir(base, splits, subdirs):
    for s in splits:
        for sub in subdirs:
            p = os.path.join(base, s, sub)
            if os.path.isdir(p):
                files = os.listdir(p)
                if files:
                    return os.path.abspath(p)
    return None

TRAIN_IMGS = find_dir(DATASET_DIR, ["train"],                    ["images",""])
VALID_IMGS = find_dir(DATASET_DIR, ["valid","validation","val"], ["images",""])
TRAIN_LBLS = find_dir(DATASET_DIR, ["train"],                    ["labels",""])
VALID_LBLS = find_dir(DATASET_DIR, ["valid","validation","val"], ["labels",""])

print("Train images:", TRAIN_IMGS)
print("Train labels:", TRAIN_LBLS)
print("Valid images:", VALID_IMGS)
print("Valid labels:", VALID_LBLS)

# Check OBB label format (should have 9 values: class + 4 xy pairs)
if TRAIN_LBLS:
    sample_txts = glob.glob(TRAIN_LBLS+"/*.txt")[:3]
    print("\nSample label contents:")
    for txt in sample_txts:
        with open(txt) as f:
            lines = [l.strip() for l in f if l.strip()]
        if lines:
            vals = lines[0].split()
            print(f"  {os.path.basename(txt)}: {len(vals)} values — ", end="")
            if len(vals) == 9:
                print("OBB format (class + 4 xy corners)")
            elif len(vals) == 5:
                print("Regular detection format (class + xywh)")
            else:
                print(f"values: {lines[0][:80]}")

# Auto-create val split if missing
if TRAIN_IMGS and not VALID_IMGS:
    import shutil
    print("\nNo validation set — auto-splitting 80/20 from train...")
    all_imgs = sorted(glob.glob(TRAIN_IMGS+"/*.jpg")+
                      glob.glob(TRAIN_IMGS+"/*.png"))
    val_imgs = all_imgs[int(len(all_imgs)*0.8):]
    os.makedirs(DATASET_DIR+"/val/images", exist_ok=True)
    os.makedirs(DATASET_DIR+"/val/labels", exist_ok=True)
    lbl_dir = TRAIN_IMGS.replace("images","labels")
    for img in val_imgs:
        shutil.move(img, DATASET_DIR+"/val/images/")
        lbl = os.path.join(lbl_dir,
              os.path.splitext(os.path.basename(img))[0]+".txt")
        if os.path.exists(lbl):
            shutil.move(lbl, DATASET_DIR+"/val/labels/")
    VALID_IMGS = os.path.abspath(DATASET_DIR+"/val/images")
    print(f"Created val set: {len(val_imgs)} images")

assert TRAIN_IMGS, "Could not find training images"
assert VALID_IMGS, "Could not find validation images"
print("\nAll folders OK!")


## Cell 5 — Write data.yaml

In [None]:
import yaml, os, glob

existing = glob.glob(DATASET_DIR+"/*.yaml")
names = ["pothole"]
if existing:
    try:
        with open(existing[0]) as f:
            old = yaml.safe_load(f)
        if isinstance(old.get("names"), list) and old["names"]:
            names = old["names"]
            print("Class names:", names)
    except:
        pass

YAML_PATH = os.path.join(DATASET_DIR, "data.yaml")
cfg = {
    "path":  os.path.abspath(DATASET_DIR),
    "train": TRAIN_IMGS,
    "val":   VALID_IMGS,
    "nc":    len(names),
    "names": names,
}
with open(YAML_PATH,"w") as f:
    yaml.dump(cfg, f, default_flow_style=False)

print("data.yaml:")
print(open(YAML_PATH).read())


## Cell 6 — Train YOLOv8n-obb (~15-20 min, do NOT close browser)
**YOLOv8n-obb** = oriented bounding boxes — rotated boxes that fit potholes tighter than regular boxes.
Watch **mAP50** go up.

In [None]:
from ultralytics import YOLO

# yolov8n-obb = nano oriented bounding box model
model = YOLO("yolov8n-obb.pt")

print("Training YOLOv8n-obb on pothole dataset...")
print("Epochs: 50 | Image: 320px | Batch: 16")
print("Watch: metrics/mAP50(OBB) going up = model learning")
print()

results = model.train(
    data       = YAML_PATH,
    epochs     = 50,
    imgsz      = 320,
    batch      = 16,
    device     = 0,
    name       = "pothole_obb",
    patience   = 15,
    optimizer  = "AdamW",
    lr0        = 0.001,
    augment    = True,
    mosaic     = 1.0,
    pretrained = True,
    verbose    = False,
    exist_ok   = True,
)

print("\nTraining complete!")
try:
    d = results.results_dict
    print(f"OBB mAP50    : {d.get('metrics/mAP50(OBB)',0):.4f}")
    print(f"OBB mAP50-95 : {d.get('metrics/mAP50-95(OBB)',0):.4f}")
except:
    print("See mAP in training output above")


## Cell 7 — Export to TFLite INT8

In [None]:
from ultralytics import YOLO
import os, glob

pts = (glob.glob("runs/obb/pothole_obb/weights/best.pt") +
       glob.glob("runs/obb/pothole_obb*/weights/best.pt") +
       glob.glob("runs/obb/*/weights/best.pt"))

assert pts, "No best.pt found — did Cell 6 finish?"
BEST_PT = sorted(pts)[-1]
print("Best weights:", BEST_PT)

trained = YOLO(BEST_PT)
print("\nExporting to TFLite INT8...")

trained.export(
    format = "tflite",
    imgsz  = 320,
    int8   = True,
    data   = YAML_PATH,
    nms    = False,
)

tflite_files = glob.glob("**/*int8*.tflite", recursive=True)
tflite_files += [f for f in glob.glob("**/*.tflite", recursive=True)
                 if f not in tflite_files]

print("\nTFLite files:")
for f in tflite_files:
    print(f"  {f}  ({os.path.getsize(f)/1024/1024:.2f} MB)")

MODEL_PATH = None
for f in tflite_files:
    if "int8" in f.lower():
        MODEL_PATH = f
        break
if not MODEL_PATH and tflite_files:
    MODEL_PATH = tflite_files[0]

assert MODEL_PATH, "No .tflite file found"
print("\nUsing:", MODEL_PATH)
print("Size  :", round(os.path.getsize(MODEL_PATH)/1024/1024,2), "MB")


## Cell 8 — Inspect output tensors

In [None]:
import tensorflow as tf
import numpy as np

interp = tf.lite.Interpreter(model_path=MODEL_PATH)
interp.allocate_tensors()

inp = interp.get_input_details()
out = sorted(interp.get_output_details(), key=lambda x: x["index"])

print("INPUT:")
for d in inp:
    print(f"  shape={d['shape']}  dtype={d['dtype'].__name__}")

print("\nOUTPUTS:")
for d in out:
    print(f"  [{d['index']}] shape={d['shape']}  dtype={d['dtype'].__name__}")
    print(f"        name={d['name']}")

dummy = np.zeros(inp[0]["shape"], dtype=inp[0]["dtype"])
interp.set_tensor(inp[0]["index"], dummy)
interp.invoke()
out0 = interp.get_tensor(out[0]["index"])

print("\nOutput[0] shape:", out0.shape)
print("\nMODEL VERIFIED!")
print()
print("YOLOv8n-obb output format: (1, 6+nc, num_anchors)")
print("Channels: [xc, yc, w, h, angle, conf] per anchor")
IS_FLOAT_INPUT = inp[0]["dtype"] == np.float32
print(f"Float input: {IS_FLOAT_INPUT}")


## Cell 9 — Download package

In [None]:
import shutil, os, glob, json
from google.colab import files

os.makedirs("rpi5_obb_package", exist_ok=True)
shutil.copy(MODEL_PATH, "rpi5_obb_package/pothole_obb_int8.tflite")

with open("rpi5_obb_package/labels.txt","w") as f:
    f.write("pothole\n")

out0_shape = list(interp.get_tensor(
    sorted(interp.get_output_details(),
           key=lambda x: x["index"])[0]["index"]
).shape)

info = {
    "model": "YOLOv8n-obb INT8 TFLite",
    "input_size": 320,
    "input_float": IS_FLOAT_INPUT,
    "output_shape": out0_shape,
    "classes": ["pothole"],
    "format": "OBB — [xc, yc, w, h, angle, conf] per anchor"
}
with open("rpi5_obb_package/model_info.json","w") as f:
    json.dump(info, f, indent=2)

print("model_info.json:")
print(json.dumps(info, indent=2))

shutil.make_archive("pothole_obb_rpi5","zip","rpi5_obb_package")
print(f"\nZip: {os.path.getsize('pothole_obb_rpi5.zip')/1024/1024:.1f} MB")
files.download("pothole_obb_rpi5.zip")
print("Done!")


## Cell 10 — Training summary for report

In [None]:
import os, glob, csv

size_mb = os.path.getsize(MODEL_PATH)/1024/1024
best_map50 = best_map95 = "N/A"

csvs = glob.glob("runs/obb/*/results.csv")
if csvs:
    with open(sorted(csvs)[-1]) as f:
        rows = list(csv.DictReader(f))
    if rows:
        cols50 = [k for k in rows[0] if "mAP50" in k and "95" not in k]
        cols95 = [k for k in rows[0] if "mAP50-95" in k]
        if cols50:
            vals = [float(r[cols50[0]]) for r in rows if r[cols50[0]].strip()]
            best_map50 = f"{max(vals):.4f}" if vals else "N/A"
        if cols95:
            vals = [float(r[cols95[0]]) for r in rows if r[cols95[0]].strip()]
            best_map95 = f"{max(vals):.4f}" if vals else "N/A"

print("="*60)
print("  BHARAT AI-SoC — OBB MODEL TRAINING SUMMARY")
print("="*60)
print(f"  Architecture     : YOLOv8n-obb (Oriented Bounding Box)")
print(f"  Task             : Rotated box detection")
print(f"  Why better       : OBB fits irregular pothole shapes")
print(f"                     tighter than axis-aligned boxes,")
print(f"                     reducing false positive area.")
print(f"  Why ARM-optimized: Depthwise separable convolutions in")
print(f"                     C2f blocks map to ARM NEON SIMD.")
print(f"                     INT8 integer ALU = 4x vs FP32.")
print(f"                     XNNPACK on Cortex-A76 (RPi 5).")
print(f"  Quantization     : INT8 post-training static")
print(f"  Model size       : {size_mb:.2f} MB")
print(f"  Input resolution : 320 x 320 px")
print(f"  Epochs           : 50 (early stop patience=15)")
print(f"  Batch size       : 16")
print(f"  Optimizer        : AdamW  lr0=0.001")
print(f"  Augmentation     : Mosaic 1.0, flips, HSV")
print(f"  Pretrained on    : COCO (fine-tuned on pothole data)")
print(f"  Target hardware  : RPi 5 ARM Cortex-A76 @ 2.4 GHz")
print(f"  Delegate         : XNNPACK (ARM NEON SIMD)")
print()
print(f"  OBB mAP50        : {best_map50}")
print(f"  OBB mAP50-95     : {best_map95}")
print("="*60)
