設定路徑+下載資料

In [3]:
import os, urllib.request, zipfile
from pathlib import Path

# 根目錄
dataset_dir=Path("/content/datasets/VisDrone")
dataset_dir.mkdir(parents=True,exist_ok=True)

# 下載網址
urls={
  "VisDrone2019-DET-train.zip":"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip",
  "VisDrone2019-DET-val.zip":"https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip",
}

# 下載+解壓+刪壓縮
for fname,url in urls.items():
  zip_path=dataset_dir/fname
  print(f"Downloading {fname} ...")
  urllib.request.urlretrieve(url,zip_path)
  print(f"Unzipping {fname} ...")
  with zipfile.ZipFile(zip_path,"r") as zf:
    zf.extractall(dataset_dir)
  zip_path.unlink()  # 刪掉 zip

# 給 YOLO 用的資料夾路徑
train_dir=dataset_dir/"VisDrone2019-DET-train"
val_dir=dataset_dir/"VisDrone2019-DET-val"
print("Train dir:",train_dir)
print("Val dir  :",val_dir)


Downloading VisDrone2019-DET-train.zip ...
Unzipping VisDrone2019-DET-train.zip ...
Downloading VisDrone2019-DET-val.zip ...
Unzipping VisDrone2019-DET-val.zip ...
Train dir: /content/datasets/VisDrone/VisDrone2019-DET-train
Val dir  : /content/datasets/VisDrone/VisDrone2019-DET-val


把 VisDrone 原始標註轉成 YOLO 格式 TXT

In [None]:
#讀每張圖的 .txt 標註，把 (x,y,w,h,score,class) 轉成 YOLO 需要的
#class x_center y_center w_norm h_norm（都除以影像寬高），輸出到 labels/
import cv2
def convert_split(split_name):
  split_dir=os.path.join(dataset_dir,split_name)
  img_dir=os.path.join(split_dir,"images")
  ann_dir=os.path.join(split_dir,"annotations")
  labels_dir=os.path.join(split_dir,"labels")
  os.makedirs(labels_dir,exist_ok=True)
  img_files=sorted([f for f in os.listdir(img_dir) if f.lower().endswith(".jpg")])
  print("處理",split_name,"圖片數:",len(img_files))
  box_total=0
  for img_name in img_files:
    stem=os.path.splitext(img_name)[0]
    img_path=os.path.join(img_dir,img_name)
    ann_path=os.path.join(ann_dir,stem+".txt")
    label_path=os.path.join(labels_dir,stem+".txt")
    if not os.path.exists(ann_path):
      open(label_path,"w").close()
      continue
    img=cv2.imread(img_path)
    if img is None:
      open(label_path,"w").close()
      continue
    h,w=img.shape[:2]
    lines_out=[]
    with open(ann_path,"r") as f:
      for line in f:
        p=line.strip().split(",")
        if len(p)<6:continue
        x=float(p[0]);y=float(p[1]);bw=float(p[2]);bh=float(p[3])
        score=float(p[4]);cls_raw=int(p[5])
        if score==0:continue
        if bw<=0 or bh<=0 or w<=0 or h<=0:continue
        cls_yolo=cls_raw-1
        xc=(x+bw/2)/w
        yc=(y+bh/2)/h
        bw_n=bw/w
        bh_n=bh/h
        if not(0<=xc<=1 and 0<=yc<=1):continue
        if bw_n<=0 or bh_n<=0 or bw_n>1 or bh_n>1:continue
        lines_out.append(f"{cls_yolo} {xc:.6f} {yc:.6f} {bw_n:.6f} {bh_n:.6f}")
    with open(label_path,"w") as f:
      if lines_out:f.write("\n".join(lines_out))
    box_total+=len(lines_out)
  print("總 boxes:",box_total,"labels_dir:",labels_dir)

convert_split("VisDrone2019-DET-train")
convert_split("VisDrone2019-DET-val")



建立 YOLO 用的 visdrone.yaml

In [None]:
#告訴 YOLO：資料在哪裡、train/val 路徑、類別數量與名稱
import textwrap
yaml_text=textwrap.dedent(f"""
path: {dataset_dir}
train: VisDrone2019-DET-train/images
val: VisDrone2019-DET-val/images
nc: 10
names:
  0: pedestrian
  1: people
  2: bicycle
  3: car
  4: van
  5: truck
  6: tricycle
  7: awning-tricycle
  8: bus
  9: motor
""").strip()
os.makedirs("data",exist_ok=True)
yaml_path="data/visdrone.yaml"
with open(yaml_path,"w") as f:f.write(yaml_text)
print(open(yaml_path).read())


安裝 Ultralytics（YOLO，底層是 PyTorch）

In [None]:
#安裝 YOLO 套件，裡面已經幫你包好 PyTorch model+訓練流程。
!pip install -q ultralytics

用 GPU 訓練 YOLO 模型（PyTorch 在 GPU 上跑）

In [None]:
from ultralytics import YOLO
from pathlib import Path
import shutil

model=YOLO("yolov8n.pt")
results=model.train(
  data="data/visdrone.yaml",
  epochs=30,
  imgsz=800,
  batch=16,
  device=0,
  project="runs",
  name="yolov8n",
  exist_ok=True
)
print("訓練完成")

#存logs到logs資料夾並下載
run_dir=Path("runs")/"yolov8n"  # 這次實驗的輸出資料夾
logs_dir=Path("/content/logs")
logs_dir.mkdir(parents=True,exist_ok=True)

zip_path=logs_dir/"yolov8n_logs.zip"
shutil.make_archive(str(zip_path.with_suffix("")),"zip",run_dir)

from google.colab import files
files.download(str(zip_path))


用最佳模型在 val 圖片上做推論 demo

In [None]:
#讀best.pt，隨機選幾張驗證集圖片，畫出偵測結果
import random,cv2,matplotlib.pyplot as plt
from pathlib import Path
best_ckpt="runs-visdrone/yolov8n-visdrone/weights/best.pt"
print("best_ckpt exists:",os.path.exists(best_ckpt))
best_model=YOLO(best_ckpt)
val_img_dir=os.path.join(dataset_dir,"VisDrone2019-DET-val","images")
val_imgs=sorted(Path(val_img_dir).glob("*.jpg"))
print("val imgs:",len(val_imgs))
samples=random.sample(val_imgs,k=min(4,len(val_imgs)))
print("samples:",[p.name for p in samples])
results=best_model.predict(
  source=[str(p) for p in samples],
  imgsz=640,
  conf=0.25,
  device=0,
  verbose=False
)
for p,r in zip(samples,results):
  img_bgr=r.plot()
  img=cv2.cvtColor(img_bgr,cv2.COLOR_BGR2RGB)
  plt.figure(figsize=(6,5))
  plt.imshow(img);plt.axis("off");plt.title(p.name)
  plt.show()



In [None]:
# 1. 取得底層的 PyTorch 模型
yolo = model.model  # 這就是 nn.Module

print("模型型別:", type(yolo))
print("模型結構:\n")
print(yolo)


從 bbox 像素 → 座標表

In [1]:
!pip install ultralytics -q

from ultralytics import YOLO

model_path="/content/best.pt"  # 你上傳的模型
model=YOLO(model_path)
print("loaded:",model_path)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m42.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
loaded: /content/best.pt


In [7]:
import torch

print("PyTorch 版本:", torch.__version__)
print("CUDA 是否可用:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU 數量:", torch.cuda.device_count())
    print("目前使用的 GPU:", torch.cuda.get_device_name(0))


PyTorch 版本: 2.9.0+cu126
CUDA 是否可用: True
GPU 數量: 1
目前使用的 GPU: Tesla T4


In [4]:
#影像 → bbox 表格（pixel 座標）

import os,glob
import pandas as pd

# 這裡改成你要跑偵測的圖片資料夾
img_dir="/content/datasets/VisDrone/VisDrone2019-DET-val/images"

img_paths=sorted(glob.glob(os.path.join(img_dir,"*.jpg")))
print("found images:",len(img_paths))

rows=[]
for img_path in img_paths:
    name=os.path.basename(img_path)
    results=model.predict(source=img_path,conf=0.25,verbose=False)
    r=results[0]
    boxes=r.boxes.xyxy.cpu().numpy()  # x1,y1,x2,y2
    scores=r.boxes.conf.cpu().numpy()
    cls=r.boxes.cls.cpu().numpy().astype(int)
    for (x1,y1,x2,y2),s,c in zip(boxes,scores,cls):
        rows.append([name,x1,y1,x2,y2,s,c])

df=pd.DataFrame(rows,columns=["image","x1","y1","x2","y2","score","cls"])
df.to_csv("/content/detections_pixel.csv",index=False)
print("saved /content/detections_pixel.csv")
df.head()


found images: 548
saved /content/detections_pixel.csv


Unnamed: 0,image,x1,y1,x2,y2,score,cls
0,0000001_02999_d_0000005.jpg,608.656982,363.82373,708.375732,414.172089,0.916222,3
1,0000001_02999_d_0000005.jpg,868.807129,569.582703,926.074341,664.771179,0.89836,3
2,0000001_02999_d_0000005.jpg,877.743225,704.197998,942.588013,814.114624,0.89278,3
3,0000001_02999_d_0000005.jpg,949.335876,591.371948,1010.858032,683.987,0.873278,3
4,0000001_02999_d_0000005.jpg,744.464539,322.546204,826.563477,378.155396,0.833203,3


In [5]:
#pixel 中心點 → 假經緯度

import numpy as np

lon0,lat0=121.0,25.0
dx,dy=0.00001,0.00001

def pixel_to_fake_latlon(x,y):
    lon=lon0+x*dx
    lat=lat0-y*dy  # 影像y向下，緯度向上
    return lon,lat

df_geo=df.copy()
df_geo["xc"]=(df_geo["x1"]+df_geo["x2"])/2
df_geo["yc"]=(df_geo["y1"]+df_geo["y2"])/2

lons=[]
lats=[]
for x,y in zip(df_geo["xc"],df_geo["yc"]):
    lon,lat=pixel_to_fake_latlon(x,y)
    lons.append(lon)
    lats.append(lat)

df_geo["lon"]=lons
df_geo["lat"]=lats

out_path="/content/detections_fake_geo.csv"
df_geo.to_csv(out_path,index=False)
print("saved",out_path)
df_geo[["image","xc","yc","lon","lat","score","cls"]].head()






saved /content/detections_fake_geo.csv


Unnamed: 0,image,xc,yc,lon,lat,score,cls
0,0000001_02999_d_0000005.jpg,658.516357,388.997925,121.006585,24.99611,0.916222,3
1,0000001_02999_d_0000005.jpg,897.440735,617.176941,121.008974,24.993828,0.89836,3
2,0000001_02999_d_0000005.jpg,910.165649,759.156311,121.009102,24.992408,0.89278,3
3,0000001_02999_d_0000005.jpg,980.096924,637.679443,121.009801,24.993623,0.873278,3
4,0000001_02999_d_0000005.jpg,785.514038,350.3508,121.007855,24.996496,0.833203,3
