# DATA PRE-PROCESSING

In [13]:
import os, sys, random
from sklearn import preprocessing, model_selection
import numpy as np
from shutil import copy2

In [19]:
# create datasets folders
root_folder = '../datasets/PP3/'

os.mkdir(root_folder)
os.mkdir(root_folder + 'images/')
os.mkdir(root_folder + 'images/train/')
os.mkdir(root_folder + 'images/valid/')

os.mkdir(root_folder + 'labels/')
os.mkdir(root_folder + 'labels/train/')
os.mkdir(root_folder + 'labels/valid/')

In [20]:
# %%time
src_img_path = "../labeling/PP3/inputs/"
src_label_path = "../labeling/PP3/outputs/YOLO_darknet/"

train_img_path = root_folder + "images/train"
train_label_path = root_folder + "labels/train"

valid_img_path = root_folder + "images/valid"
valid_label_path = root_folder + "labels/valid"

In [21]:
# Get filenames ordered by name
X = sorted(filter(lambda x: os.path.isfile(os.path.join(src_img_path, x)), os.listdir(src_img_path)))
y = sorted(filter(lambda x: os.path.isfile(os.path.join(src_label_path, x)), os.listdir(src_label_path)))

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, shuffle=True, random_state=42)


print(X_test[0:5])
print(y_test[0:5])

['IMG_20200307_203241125.jpg', 'WhatsApp Image 2022-02-28 at 10.13.41 (1).jpeg', 'WhatsApp Image 2022-03-01 at 12.46.44.jpeg', 'WhatsApp Image 2022-02-28 at 10.20.27.jpeg', 'WhatsApp Image 2022-02-28 at 10.30.05.jpeg']
['IMG_20200307_203241125.txt', 'WhatsApp Image 2022-02-28 at 10.13.41 (1).txt', 'WhatsApp Image 2022-03-01 at 12.46.44.txt', 'WhatsApp Image 2022-02-28 at 10.20.27.txt', 'WhatsApp Image 2022-02-28 at 10.30.05.txt']


In [22]:
def segregate_data(x_, y_, img_path, label_path, split_img_path, split_label_path):  
    for filename in x_:
        copy2(os.path.join(img_path, filename), os.path.join(split_img_path, filename))
    for filename in y_:
        copy2(os.path.join(label_path, filename), os.path.join(split_label_path, filename))

In [23]:
segregate_data(X_train, y_train, src_img_path, src_label_path, train_img_path, train_label_path)
segregate_data(X_test, y_test, src_img_path, src_label_path, valid_img_path, valid_label_path)

print("Num of Training images", len(os.listdir(train_img_path)))
print("Num of Training labels", len(os.listdir(train_label_path)))

print("Num of Test images", len(os.listdir(valid_img_path)))
print("Num of Test labels", len(os.listdir(valid_label_path)))

Num of Training images 103
Num of Training labels 103
Num of Test images 51
Num of Test labels 51


# YOLO V5

In [25]:
# check the 

%cat 'pp.yaml'

path: ../datasets/PP
train: images/train
val: images/valid

# Classes
nc: 3  # number of classes
names: ['person', 'cat', 'dog']  # class names

In [11]:
# Uncomment de git clone command if running the first time
# !git clone https://github.com/ultralytics/yolov5
# %cd ../yolov5

# install yolov5 requirements.... see readme files...
# !pip3 install -r requirements.txt

/home/sergio/Projects/Doutorado/ML/yolov5


In [None]:
%load_ext tensorboard
%tensorboard --logdir ../yolov5/runs/train

In [3]:
# GPU only supports small models
!python3 ../yolov5/train.py --img 640 --batch 8 --epochs 50 --data ../applied-machine-learning/pp3.yaml --cfg ../yolov5/models/yolov5s.yaml --name PP3_small --single-cls
# !python3 ../yolov5/train.py --img 640 --batch 8 --epochs 100 --data ../applied-machine-learning/pp3.yaml --cfg ../yolov5/models/yolov5s.yaml --name PP3_small_scratch --weights ''
# !python3 ../yolov5/train.py --img 640 --batch 8 --epochs 100 --data ../applied-machine-learning/pp.yaml --cfg ../yolov5/models/yolov5l.yaml --name PP --device cpu

[34m[1mtrain: [0mweights=../yolov5/yolov5s.pt, cfg=../yolov5/models/hub/yolov5s6.yaml, data=../applied-machine-learning/pp3.yaml, hyp=../yolov5/data/hyps/hyp.scratch-low.yaml, epochs=50, batch_size=8, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=True, optimizer=SGD, sync_bn=False, workers=8, project=../yolov5/runs/train, name=PP3_small, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 7 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v6.1-14-g8a66eba torch 1.10.2+cu102 CUDA:0 (NVIDIA GeForce GTX 1650, 3912MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.

## Training with yolov5m

In [2]:
# GPU only supports small models
# !python3 ../yolov5/train.py --img 640 --batch 4 --epochs 100 --data ../applied-machine-learning/pp3.yaml --cfg ../yolov5/models/yolov5m.yaml --name PP3_m --weights 'yolov5m.pt'
!python3 ../yolov5/train.py --img 640 --epochs 100 --data ../applied-machine-learning/pp3.yaml --cfg ../yolov5/models/yolov5m.yaml --name PP3_m --weights 'yolov5m.pt' --device cpu

[34m[1mtrain: [0mweights=yolov5m.pt, cfg=../yolov5/models/yolov5m.yaml, data=../applied-machine-learning/pp3.yaml, hyp=../yolov5/data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=cpu, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=../yolov5/runs/train, name=PP3_m, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 10 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v6.1-14-g8a66eba torch 1.10.2+cu102 CPU

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.