In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/animal_detection

/content/drive/MyDrive/animal_detection


### Data Download & custom

In [3]:
!pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
# 사전 작업 : kaggle에서 API 토큰 설치 
# kaggle API 토큰 옮기기

from google.colab import files 
files.upload()

!mkdir -p ~/.kaggle 
!cp kaggle.json ~/.kaggle

!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [5]:
# data download

!kaggle competitions download -c kdtai-3

Downloading kdtai-3.zip to /content/drive/MyDrive/animal_detection
 98% 440M/447M [00:05<00:00, 129MB/s]
100% 447M/447M [00:05<00:00, 90.9MB/s]


In [6]:
# unzip

import os
import zipfile

zip_path = os.path.join(os.getcwd(), 'kdtai-3.zip')
unzip_path = os.getcwd()

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_path)

### Data structures in yolo format

In [7]:
pwd

'/content/drive/MyDrive/animal_detection'

In [8]:
# dataset 폴더에 labels, images 폴더 생성

labels_path = os.path.join(os.getcwd(), 'dataset', 'labels')
images_path = os.path.join(os.getcwd(), 'dataset', 'images')

os.makedirs(labels_path, exist_ok=True)
os.makedirs(images_path, exist_ok=True)

In [9]:
# test, train 이미지 폴더를 생성한 폴더로 이동
import shutil

test_path = os.path.join(os.getcwd(), 'dataset', 'test')
train_path = os.path.join(os.getcwd(), 'dataset', 'train')
destination_path = os.path.join(os.getcwd(), 'dataset', 'images')

shutil.move(test_path, destination_path)
shutil.move(train_path, destination_path)

'/content/drive/MyDrive/animal_detection/dataset/images/train'

In [10]:
# test,train_output.csv -> dataset/labels

test_csv_path = os.path.join(os.getcwd(), 'dataset', 'images', 'test', 'test_output.csv')
train_csv_path = os.path.join(os.getcwd(), 'dataset', 'images', 'train', 'train_output.csv')
csv_destination = os.path.join(os.getcwd(), 'dataset', 'labels')

shutil.move(test_csv_path, csv_destination)
shutil.move(train_csv_path, csv_destination)

'/content/drive/MyDrive/animal_detection/dataset/labels/train_output.csv'

In [11]:
import os 
import pandas as pd

train_df = pd.read_csv('/content/drive/MyDrive/animal_detection/dataset/labels/train_output.csv')
test_df = pd.read_csv('/content/drive/MyDrive/animal_detection/dataset/labels/test_output.csv')

In [12]:
%cd dataset/labels

/content/drive/MyDrive/animal_detection/dataset/labels


In [13]:
# train_output.csv에서 각 이미지의 annotation 파일 생성

if not os.path.exists('train'):
    os.makedirs('train')

for i in range(len(train_df)):
    title = train_df.iloc[i, 0]
    title = str(title).zfill(4)
  

    data = train_df.iloc[i, 1]

    file_path = os.path.join('train', f'{title}.txt')

    with open(file_path, 'w') as file:
        file.write(data)

In [14]:
# test_output.csv에서 각 이미지의 annotation파일 생성

if not os.path.exists('test'):
    os.makedirs('test')

for i in range(len(test_df)):
    title = test_df.iloc[i, 0]
    title = str(title).zfill(4)

    data = test_df.iloc[i, 1]

    file_path = os.path.join('test', f'{title}.txt')

    with open(file_path, 'w') as file:
        file.write(data)

### Data Split : train -> train, val

In [15]:
%cd ../..

/content/drive/MyDrive/animal_detection


In [16]:
from glob import glob

img_list = glob("/content/drive/MyDrive/animal_detection/dataset/images/train/*.jpg")
print(len(img_list))

1204


In [17]:
# 1204개 이미지 중 1200개 사용

import random

random_image_path = random.sample(img_list, k=1200)
print(len(random_image_path))

1200


In [18]:
# train : val = 4 : 1

from sklearn.model_selection import train_test_split

train_img_list, val_img_list = train_test_split(random_image_path, test_size=0.2, random_state=2000)

print(len(train_img_list), len(val_img_list))

960 240


In [20]:
train_img_list[:5]

['/content/drive/MyDrive/animal_detection/dataset/images/train/1057.jpg',
 '/content/drive/MyDrive/animal_detection/dataset/images/train/0387.jpg',
 '/content/drive/MyDrive/animal_detection/dataset/images/train/0649.jpg',
 '/content/drive/MyDrive/animal_detection/dataset/images/train/0263.jpg',
 '/content/drive/MyDrive/animal_detection/dataset/images/train/0318.jpg']

In [22]:
# dataset 폴더 하위에 train, val 각각 경로 담긴 txt 파일 생성

with open('./dataset/train.txt', 'w') as f:
    f.write('\n'.join(train_img_list) + '\n')

with open('./dataset/val.txt', 'w') as f:
    f.write('\n'.join(val_img_list) + '\n')

### yaml 파일 생성

In [23]:
%cd dataset

/content/drive/MyDrive/animal_detection/dataset


In [24]:
# data.yaml 작성
# 클래스 명, 클래스 개수, yaml경로, 나머지 경로 명시


import yaml

data = {
    'names': ['buffalo', 'elephant', 'rhino', 'zebra'],
    'nc' : 4,
    'path': os.getcwd(),
    'train': os.path.join(os.getcwd(), 'train.txt'),
    'val': os.path.join(os.getcwd(), 'val.txt'),
    'test': None
}

# data.yaml 파일에 데이터 작성
with open('data.yaml', 'w') as file:
    yaml.dump(data, file)

### Yolov5 training

In [25]:
pwd

'/content/drive/MyDrive/animal_detection/dataset'

In [26]:
# YOLOv5 설치

%cd ..

!git clone https://github.com/ultralytics/yolov5.git

/content/drive/MyDrive/animal_detection
Cloning into 'yolov5'...
remote: Enumerating objects: 15978, done.[K
remote: Counting objects: 100% (147/147), done.[K
remote: Compressing objects: 100% (77/77), done.[K
remote: Total 15978 (delta 89), reused 110 (delta 70), pack-reused 15831[K
Receiving objects: 100% (15978/15978), 14.54 MiB | 14.24 MiB/s, done.
Resolving deltas: 100% (10963/10963), done.


In [27]:
%cd yolov5

!pip install -r requirements.txt

/content/drive/MyDrive/animal_detection/yolov5
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gitpython>=3.1.30 (from -r requirements.txt (line 5))
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralytics>=8.0.111 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.0.116-py3-none-any.whl (599 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m599.6/599.6 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30->-r requirements.txt (line 5))
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.6 MB/s[0m eta

### model train

In [28]:
!python train.py --img 224 --batch 32 --epochs 30 --data /content/drive/MyDrive/animal_detection/dataset/data.yaml --cfg /content/drive/MyDrive/animal_detection/yolov5/models/yolov5s.yaml --weights /content/drive/MyDrive/animal_detection/yolov5/models/yolov5s.pt --name animal_weights 

[34m[1mtrain: [0mweights=/content/drive/MyDrive/animal_detection/yolov5/models/yolov5s.pt, cfg=/content/drive/MyDrive/animal_detection/yolov5/models/yolov5s.yaml, data=/content/drive/MyDrive/animal_detection/dataset/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=30, batch_size=32, imgsz=224, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=animal_weights, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-178-ga199480 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentu

### Yolov5 test

In [30]:
from glob import glob

test_img_list = glob("/content/drive/MyDrive/animal_detection/dataset/images/test/*.jpg")
print(len(test_img_list))

300


In [31]:
with open('/content/drive/MyDrive/animal_detection/dataset/test.txt', 'w') as f:
    f.write('\n'.join(test_img_list) + '\n')

In [32]:
# data.yaml에 test.txt경로 추가

import yaml

with open('/content/drive/MyDrive/animal_detection/dataset/data.yaml', 'r') as f:
    data = yaml.load(f, Loader=yaml.SafeLoader)
print(data)

data['test'] = '/content/drive/MyDrive/animal_detection/dataset/test.txt' 

with open('/content/drive/MyDrive/animal_detection/dataset/data.yaml', 'w') as f:
    yaml.dump(data, f)

print(data)

{'names': ['buffalo', 'elephant', 'rhino', 'zebra'], 'nc': 4, 'path': '/content/drive/MyDrive/animal_detection/dataset', 'test': None, 'train': '/content/drive/MyDrive/animal_detection/dataset/train.txt', 'val': '/content/drive/MyDrive/animal_detection/dataset/val.txt'}
{'names': ['buffalo', 'elephant', 'rhino', 'zebra'], 'nc': 4, 'path': '/content/drive/MyDrive/animal_detection/dataset', 'test': '/content/drive/MyDrive/animal_detection/dataset/test.txt', 'train': '/content/drive/MyDrive/animal_detection/dataset/train.txt', 'val': '/content/drive/MyDrive/animal_detection/dataset/val.txt'}


In [33]:
!python val.py --task "test" --data /content/drive/MyDrive/animal_detection/dataset/data.yaml --weights /content/drive/MyDrive/animal_detection/yolov5/runs/train/animal_weights/weights/best.pt  --iou-thres 0.8 --device 0 --save-txt

[34m[1mval: [0mdata=/content/drive/MyDrive/animal_detection/dataset/data.yaml, weights=['/content/drive/MyDrive/animal_detection/yolov5/runs/train/animal_weights/weights/best.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.8, max_det=300, task=test, device=0, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-178-ga199480 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5s summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
[34m[1mtest: [0mScanning /content/drive/MyDrive/animal_detection/dataset/test... 300 images, 0 backgrounds, 0 corrupt: 100% 300/300 [00:01<00:00, 169.29it/s]
[34m[1mtest: [0mNew cache created: /content/drive/MyDrive/animal_detection/dataset/test.cache
                 Class     Images  Instances          P          R      mAP75   mAP50-95: 1