<a href="https://colab.research.google.com/github/langhopepe/PCD_fruitdetection/blob/main/pcdfruitdetection_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi
!pip -q install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip -q install onnx onnxruntime scikit-learn


Tue Nov 11 10:53:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive')

ROOT      = Path('/content/drive/MyDrive/fruit-ripeness')
RIPE_DIR  = ROOT/'dataset_ripeness'      # kamu sudah meletakkan ZIP di bawah folder ini
GATE_DIR  = ROOT/'dataset_gate'          # akan kita isi otomatis dari ripeness
RIPE_DIR.mkdir(parents=True, exist_ok=True)
GATE_DIR.mkdir(parents=True, exist_ok=True)

IMG_EXT = {'.jpg','.jpeg','.png','.webp','.bmp'}

Mounted at /content/drive


In [None]:
import shutil
from zipfile import ZipFile

DELETE_ZIP = False  # ubah ke True kalau mau hapus file .zip setelah ekstrak

def unzip_in_place_per_fruit(ripeness_root: Path):
    # deteksi folder buah (level langsung di bawah dataset_ripeness)
    fruit_dirs = [p for p in ripeness_root.iterdir() if p.is_dir()]
    print('Buah terdeteksi:', [d.name for d in fruit_dirs])

    total = 0
    for fdir in fruit_dirs:
        # cari ZIP hanya di level folder buah (tidak rekursif)
        zips = list(fdir.glob('*.zip')) + list(fdir.glob('*.ZIP'))
        if not zips:
            print(f'[{fdir.name}] tidak ada ZIP')
            continue

        print(f'[{fdir.name}] ZIP ditemukan: {len(zips)}')
        for z in zips:
            print(f'  Extract: {z.name} -> {fdir}')
            try:
                with ZipFile(z) as zf:
                    zf.extractall(fdir)
            except Exception as e:
                print('    ERR:', e)
                continue

            # bersihkan folder sampah macOS
            mac = fdir/'__MACOSX'
            if mac.exists():
                shutil.rmtree(mac, ignore_errors=True)

            if DELETE_ZIP:
                try: z.unlink()
                except: pass

            total += 1
    print(f'Selesai. ZIP diekstrak: {total}')

unzip_in_place_per_fruit(RIPE_DIR)


Buah terdeteksi: ['apple', 'orange', 'banana']
[apple] ZIP ditemukan: 3
  Extract: freshapples.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/apple
  Extract: rottenapples.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/apple
  Extract: unripeapple.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/apple
[orange] ZIP ditemukan: 3
  Extract: freshoranges.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/orange
  Extract: rottenoranges.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/orange
  Extract: unripeorange.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/orange
[banana] ZIP ditemukan: 3
  Extract: freshbanana.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/banana
  Extract: rottenbanana.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/banana
  Extract: unripebanana.zip -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness/banana
Selesai. ZIP diekstrak: 9


In [None]:
def count_leaf_images(root: Path):
    root = Path(root)
    for p in sorted(root.rglob('*')):
        if p.is_dir():
            n = sum(1 for f in p.iterdir() if f.is_file() and f.suffix.lower() in IMG_EXT)
            if n>0:
                print(f'{n:4d}  -> {p.relative_to(root)}')

print('--- RIPENESS ---')
count_leaf_images(RIPE_DIR)
print('--- GATE ---')
count_leaf_images(GATE_DIR)


--- RIPENESS ---
2342  -> apple/overripe
1693  -> apple/ripe
1934  -> apple/unripe
2224  -> banana/overripe
1581  -> banana/ripe
2097  -> banana/unripe
1595  -> orange/overripe
1466  -> orange/ripe
1285  -> orange/unripe
--- GATE ---
 400  -> apple
   3  -> banana
   3  -> orange
  14  -> other


In [None]:
from shutil import copy2

def copy_ripeness_to_gate(ripeness_root: Path, gate_root: Path):
    for fruit in ['apple','banana','orange']:
        src = ripeness_root/fruit
        dst = gate_root/fruit
        if not src.exists():
            print(f'SKIP: {src} tidak ada')
            continue
        dst.mkdir(parents=True, exist_ok=True)
        added = 0
        for f in src.rglob('*'):
            if f.is_file() and f.suffix.lower() in IMG_EXT:
                target = dst/f.name
                i = 1
                while target.exists():  # hindari nama duplikat
                    target = dst/f'{target.stem}_{i}{target.suffix}'
                    i += 1
                copy2(f, target)
                added += 1
        print(f'Gate {fruit}: +{added} files')

copy_ripeness_to_gate(RIPE_DIR, GATE_DIR)

# (Ingat) tambahkan data non-target ke GATE_DIR/"other" secara manual agar gate makin tegas menolak.
(GATE_DIR/'other').mkdir(parents=True, exist_ok=True)


Gate apple: +5969 files
Gate banana: +5902 files
Gate orange: +4346 files


Proses Split 70/15/15

In [None]:
# ==== RE-RUN ONCE: constants + split (no flatten) ====
from pathlib import Path
import os, shutil, random

# --- Paths (ubah jika root berbeda) ---
ROOT = Path('/content/drive/MyDrive/fruit-ripeness')
RIPE_DIR = ROOT/'dataset_ripeness'         # dataset_ripeness/<fruit>/<unripe|ripe|overripe>
GATE_DIR = ROOT/'dataset_gate'             # optional: apple/banana/orange/other

# --- Consts ---
FRUITS = ['apple','banana','orange']
RIPS   = ['unripe','ripe','overripe']

# --- Helpers ---
def is_img(p: str):
    p = str(p).lower()
    return p.endswith(('.jpg','.jpeg','.png','.webp','.bmp'))

def split_leaf_recursive(src_leaf: Path, dst_root: Path, rel_subdir: str, ratios=(0.7,0.15,0.15)):
    files = []
    for dp, _, fs in os.walk(src_leaf):
        for fn in fs:
            fp = Path(dp)/fn
            if is_img(fp):
                files.append(fp)
    if not files:
        print('EMPTY:', src_leaf)
        return
    random.seed(42); random.shuffle(files)
    n = len(files); n_tr = int(ratios[0]*n); n_va = int(ratios[1]*n)
    parts = {'train': files[:n_tr], 'val': files[n_tr:n_tr+n_va], 'test': files[n_tr+n_va:]}
    for split, flist in parts.items():
        dst = dst_root / split / rel_subdir
        dst.mkdir(parents=True, exist_ok=True)
        for src in flist:
            tgt = dst / src.name
            i = 1
            while tgt.exists():
                tgt = dst / f'{tgt.stem}_{i}{tgt.suffix}'
                i += 1
            shutil.copy2(src, tgt)

# --- Split GATE (4 kelas). Aman meski GATE_DIR belum lengkap ---
GATE_SPLIT = ROOT/'dataset_gate_split'
if GATE_SPLIT.exists(): shutil.rmtree(GATE_SPLIT)

for cls in ['apple','banana','orange','other']:
    split_leaf_recursive(GATE_DIR/cls, GATE_SPLIT, cls)

# --- Split RIPENESS (per buah: unripe/ripe/overripe) ---
RIPE_SPLIT = ROOT/'dataset_ripeness_split'
if RIPE_SPLIT.exists(): shutil.rmtree(RIPE_SPLIT)

for fruit in FRUITS:
    for rip in RIPS:
        split_leaf_recursive(RIPE_DIR/fruit/rip, RIPE_SPLIT, f'{fruit}/{rip}')

print('✅ Done. Splits at:')
print('  GATE  ->', GATE_SPLIT)
print('  RIPEN ->', RIPE_SPLIT)


✅ Done. Splits at:
  GATE  -> /content/drive/MyDrive/fruit-ripeness/dataset_gate_split
  RIPEN -> /content/drive/MyDrive/fruit-ripeness/dataset_ripeness_split


In [None]:
!pip -q install onnx onnxruntime scikit-learn

import torch, torch.nn as nn, torchvision as tv
from torchvision import transforms as T
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
from pathlib import Path
import numpy as np, json, tempfile, shutil

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.backends.cudnn.benchmark = True
IMG_SIZE=224; MEAN=[0.485,0.456,0.406]; STD=[0.229,0.224,0.225]

tfm_train = T.Compose([
    T.Resize(256),
    T.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    T.RandomHorizontalFlip(),
    T.ColorJitter(0.2,0.2,0.2,0.05),
    T.ToTensor(),
    T.Normalize(MEAN, STD),
])
tfm_eval = T.Compose([
    T.Resize(256),
    T.CenterCrop(IMG_SIZE),
    T.ToTensor(),
    T.Normalize(MEAN, STD),
])

def loaders_from_root(root, batch=32, workers=2):
    ds_tr = tv.datasets.ImageFolder(str(Path(root)/'train'), transform=tfm_train)
    ds_va = tv.datasets.ImageFolder(str(Path(root)/'val'),   transform=tfm_eval)
    ds_te = tv.datasets.ImageFolder(str(Path(root)/'test'),  transform=tfm_eval)
    dl_tr = DataLoader(ds_tr, batch_size=batch, shuffle=True,  num_workers=workers, pin_memory=True)
    dl_va = DataLoader(ds_va, batch_size=batch, shuffle=False, num_workers=workers, pin_memory=True)
    dl_te = DataLoader(ds_te, batch_size=batch, shuffle=False, num_workers=workers, pin_memory=True)
    return ds_tr.classes, ds_tr, dl_tr, dl_va, dl_te

def class_weights_from(ds):
    import collections, torch as th
    cnt = collections.Counter([y for _,y in ds.samples])
    mx  = max(cnt.values())
    return th.tensor([mx/cnt[i] for i in range(len(ds.classes))], dtype=th.float32)

def resnet18(num_classes):
    m = tv.models.resnet18(weights=tv.models.ResNet18_Weights.DEFAULT)
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

def train_eval_export(split_root, num_classes, out_prefix, epochs=30, lr=3e-4, patience=7, batch=32):
    classes, ds_tr, dl_tr, dl_va, dl_te = loaders_from_root(split_root, batch=batch)
    assert len(classes)==num_classes, classes
    w = class_weights_from(ds_tr).to(device)

    m = resnet18(num_classes).to(device)
    opt = torch.optim.AdamW(m.parameters(), lr=lr)
    crit = nn.CrossEntropyLoss(weight=w)
    scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))

    best_acc, wait, best_state = -1, 0, None
    for ep in range(1, epochs+1):
        m.train(); tot=0; n=0
        for x,y in dl_tr:
            x,y = x.to(device), y.to(device)
            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(device=='cuda')):
                out  = m(x); loss = crit(out,y)
            scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
            tot += loss.item()*y.size(0); n += y.size(0)
        tr_loss = tot/n

        m.eval(); corr=0; nva=0
        with torch.inference_mode():
            for x,y in dl_va:
                x,y = x.to(device), y.to(device)
                p = m(x).argmax(1); corr += (p==y).sum().item(); nva += y.numel()
        val_acc = corr/nva
        print(f"[{out_prefix}] ep {ep:02d}  tr_loss={tr_loss:.4f}  val_acc={val_acc:.3f}")

        if val_acc>best_acc:
            best_acc, wait, best_state = val_acc, 0, m.state_dict()
        else:
            wait += 1
            if wait>=patience: break

    # Test
    m.load_state_dict(best_state); m.eval()
    ys=[]; ps=[]
    with torch.inference_mode():
        for x,y in dl_te:
            x = x.to(device)
            ps.append(m(x).argmax(1).cpu()); ys.append(y)
    import torch as th
    y_true = th.cat(ys).numpy(); y_pred = th.cat(ps).numpy()
    print(f"[{out_prefix}] TEST\n", classification_report(y_true, y_pred, target_names=classes))
    print(f"[{out_prefix}] CM\n", confusion_matrix(y_true, y_pred))

    # Export ONNX + labels
    th.save({"model":m.state_dict(), "classes":classes}, f"{out_prefix}.pth")
    m = m.to('cpu'); dummy = th.randn(1,3,IMG_SIZE,IMG_SIZE)
    th.onnx.export(m, dummy, f"{out_prefix}.onnx", input_names=["input"], output_names=["logits"], opset_version=17)
    with open(f"{out_prefix}.labels.json","w") as f:
        json.dump(classes, f)


In [None]:
MODELS_DIR = Path('/content/models'); MODELS_DIR.mkdir(parents=True, exist_ok=True)

# 1) GATE
gate_out = MODELS_DIR/'gate_resnet18'
train_eval_export(ROOT/'dataset_gate_split', num_classes=4, out_prefix=str(gate_out),
                  epochs=30, lr=3e-4, patience=7, batch=32)

# 2) RIPENESS per buah (apple/banana/orange)
for fruit in FRUITS:
    # kumpulkan subset split per buah ke folder sementara
    tmp_root = ROOT/f'_ripeness_split_{fruit}'
    if tmp_root.exists(): shutil.rmtree(tmp_root)
    for sp in ['train','val','test']:
        src = ROOT/'dataset_ripeness_split'/sp/fruit
        if src.exists():
            shutil.copytree(src, tmp_root/sp)
    out = MODELS_DIR/f"ripeness_{fruit}_resnet18"
    train_eval_export(tmp_root, num_classes=3, out_prefix=str(out),
                      epochs=30, lr=3e-4, patience=7, batch=32)
    shutil.rmtree(tmp_root, ignore_errors=True)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 196MB/s]
  scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))
  with torch.cuda.amp.autocast(enabled=(device=='cuda')):


[/content/models/gate_resnet18] ep 01  tr_loss=0.3525  val_acc=0.915
[/content/models/gate_resnet18] ep 02  tr_loss=0.2044  val_acc=0.905
[/content/models/gate_resnet18] ep 03  tr_loss=0.1660  val_acc=0.952
[/content/models/gate_resnet18] ep 04  tr_loss=0.1941  val_acc=0.949
[/content/models/gate_resnet18] ep 05  tr_loss=0.1196  val_acc=0.972
[/content/models/gate_resnet18] ep 06  tr_loss=0.0760  val_acc=0.950
[/content/models/gate_resnet18] ep 07  tr_loss=0.0645  val_acc=0.967
[/content/models/gate_resnet18] ep 08  tr_loss=0.0855  val_acc=0.962
[/content/models/gate_resnet18] ep 09  tr_loss=0.1028  val_acc=0.854
[/content/models/gate_resnet18] ep 10  tr_loss=0.2060  val_acc=0.944
[/content/models/gate_resnet18] ep 11  tr_loss=0.0879  val_acc=0.965
[/content/models/gate_resnet18] ep 12  tr_loss=0.0458  val_acc=0.961
[/content/models/gate_resnet18] TEST
               precision    recall  f1-score   support

       apple       0.94      0.98      0.96       956
      banana       0.98  

  th.onnx.export(m, dummy, f"{out_prefix}.onnx", input_names=["input"], output_names=["logits"], opset_version=17)
  scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))
  with torch.cuda.amp.autocast(enabled=(device=='cuda')):


[/content/models/ripeness_apple_resnet18] ep 01  tr_loss=0.1004  val_acc=0.973
[/content/models/ripeness_apple_resnet18] ep 02  tr_loss=0.0434  val_acc=0.999
[/content/models/ripeness_apple_resnet18] ep 03  tr_loss=0.0138  val_acc=0.996
[/content/models/ripeness_apple_resnet18] ep 04  tr_loss=0.0299  val_acc=0.996
[/content/models/ripeness_apple_resnet18] ep 05  tr_loss=0.0136  val_acc=0.999
[/content/models/ripeness_apple_resnet18] ep 06  tr_loss=0.0289  val_acc=0.994
[/content/models/ripeness_apple_resnet18] ep 07  tr_loss=0.0074  val_acc=1.000
[/content/models/ripeness_apple_resnet18] ep 08  tr_loss=0.0151  val_acc=0.993
[/content/models/ripeness_apple_resnet18] ep 09  tr_loss=0.0273  val_acc=0.996
[/content/models/ripeness_apple_resnet18] ep 10  tr_loss=0.0129  val_acc=0.989
[/content/models/ripeness_apple_resnet18] ep 11  tr_loss=0.0125  val_acc=0.965
[/content/models/ripeness_apple_resnet18] ep 12  tr_loss=0.0176  val_acc=0.992
[/content/models/ripeness_apple_resnet18] ep 13  tr_

  th.onnx.export(m, dummy, f"{out_prefix}.onnx", input_names=["input"], output_names=["logits"], opset_version=17)
  scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))
  with torch.cuda.amp.autocast(enabled=(device=='cuda')):


[/content/models/ripeness_banana_resnet18] ep 01  tr_loss=0.0855  val_acc=0.986
[/content/models/ripeness_banana_resnet18] ep 02  tr_loss=0.0147  val_acc=0.976
[/content/models/ripeness_banana_resnet18] ep 03  tr_loss=0.0306  val_acc=0.991
[/content/models/ripeness_banana_resnet18] ep 04  tr_loss=0.0313  val_acc=0.984
[/content/models/ripeness_banana_resnet18] ep 05  tr_loss=0.0134  val_acc=1.000
[/content/models/ripeness_banana_resnet18] ep 06  tr_loss=0.0176  val_acc=0.966
[/content/models/ripeness_banana_resnet18] ep 07  tr_loss=0.0069  val_acc=0.993
[/content/models/ripeness_banana_resnet18] ep 08  tr_loss=0.0151  val_acc=0.983
[/content/models/ripeness_banana_resnet18] ep 09  tr_loss=0.0137  val_acc=0.990
[/content/models/ripeness_banana_resnet18] ep 10  tr_loss=0.0037  val_acc=0.962
[/content/models/ripeness_banana_resnet18] ep 11  tr_loss=0.0026  val_acc=0.999
[/content/models/ripeness_banana_resnet18] ep 12  tr_loss=0.0125  val_acc=0.988
[/content/models/ripeness_banana_resnet1

  th.onnx.export(m, dummy, f"{out_prefix}.onnx", input_names=["input"], output_names=["logits"], opset_version=17)
  scaler = torch.cuda.amp.GradScaler(enabled=(device=='cuda'))
  with torch.cuda.amp.autocast(enabled=(device=='cuda')):


[/content/models/ripeness_orange_resnet18] ep 01  tr_loss=0.1087  val_acc=0.978
[/content/models/ripeness_orange_resnet18] ep 02  tr_loss=0.0534  val_acc=0.988
[/content/models/ripeness_orange_resnet18] ep 03  tr_loss=0.0290  val_acc=0.997
[/content/models/ripeness_orange_resnet18] ep 04  tr_loss=0.0301  val_acc=1.000
[/content/models/ripeness_orange_resnet18] ep 05  tr_loss=0.0241  val_acc=0.997
[/content/models/ripeness_orange_resnet18] ep 06  tr_loss=0.0223  val_acc=0.995
[/content/models/ripeness_orange_resnet18] ep 07  tr_loss=0.0175  val_acc=0.972
[/content/models/ripeness_orange_resnet18] ep 08  tr_loss=0.0113  val_acc=0.995
[/content/models/ripeness_orange_resnet18] ep 09  tr_loss=0.0181  val_acc=0.998
[/content/models/ripeness_orange_resnet18] ep 10  tr_loss=0.0076  val_acc=1.000
[/content/models/ripeness_orange_resnet18] ep 11  tr_loss=0.0082  val_acc=0.997
[/content/models/ripeness_orange_resnet18] TEST
               precision    recall  f1-score   support

    overripe    

  th.onnx.export(m, dummy, f"{out_prefix}.onnx", input_names=["input"], output_names=["logits"], opset_version=17)


In [None]:
!zip -r /content/models_onnx.zip /content/models
print("✅ Siap diunduh: /content/models_onnx.zip")

  adding: content/models/ (stored 0%)
  adding: content/models/ripeness_apple_resnet18.pth (deflated 7%)
  adding: content/models/ripeness_orange_resnet18.pth (deflated 7%)
  adding: content/models/gate_resnet18.onnx (deflated 7%)
  adding: content/models/ripeness_orange_resnet18.labels.json (deflated 27%)
  adding: content/models/gate_resnet18.pth (deflated 7%)
  adding: content/models/gate_resnet18.labels.json (deflated 13%)
  adding: content/models/ripeness_banana_resnet18.pth (deflated 7%)
  adding: content/models/ripeness_banana_resnet18.onnx (deflated 7%)
  adding: content/models/ripeness_banana_resnet18.labels.json (deflated 27%)
  adding: content/models/ripeness_apple_resnet18.labels.json (deflated 27%)
  adding: content/models/ripeness_apple_resnet18.onnx (deflated 7%)
  adding: content/models/ripeness_orange_resnet18.onnx (deflated 7%)
✅ Siap diunduh: /content/models_onnx.zip
