In [2]:
import fiftyone as fo
import fiftyone.zoo as foz


In [31]:

train = foz.load_zoo_dataset(
    "coco-2017",
    split="train",
    label_types=["detections"],
    classes=["person", "cat", "dog"],
    # max_samples=200 # remove limitations if project is working
)


Downloading split 'train' to '/home/sergio/fiftyone/coco-2017/train' if necessary
Found annotations at '/home/sergio/fiftyone/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'coco-2017-train'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [32]:

test = foz.load_zoo_dataset(
    "coco-2017",
    split="test",
    label_types=["detections"],
    classes=["person", "cat", "dog"],
    # max_samples=100 # remove limitations if project is working
)

# Look at the data
# session = fo.launch_app(test)


Downloading split 'test' to '/home/sergio/fiftyone/coco-2017/test' if necessary
Test split is unlabeled; ignoring classes requirement
Found test info at '/home/sergio/fiftyone/coco-2017/raw/image_info_test2017.json'
Images already downloaded
Existing download of split 'test' is sufficient
Loading 'coco-2017' split 'test'
Dataset is unlabeled; ignoring classes requirement
 100% |█████████████| 40670/40670 [11.8s elapsed, 0s remaining, 3.2K samples/s]      
Dataset 'coco-2017-test' created


In [33]:

validation = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    label_types=["detections"],
    classes=["person", "cat", "dog"],
    # max_samples=100 # remove limitations if project is working
)

Downloading split 'validation' to '/home/sergio/fiftyone/coco-2017/validation' if necessary
Found annotations at '/home/sergio/fiftyone/coco-2017/raw/instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Loading 'coco-2017' split 'validation'
 100% |███████████████| 2945/2945 [14.7s elapsed, 0s remaining, 200.5 samples/s]      
Dataset 'coco-2017-validation' created


In [34]:
export_dir = "../datasets/COCO_REDUC_PP"
label_field = "ground_truth"


In [35]:

validation.export(
    export_dir=export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=label_field,
    classes=["person", "cat", "dog"]
)


Directory '../datasets/COCO_REDUC_PP' already exists; export will be merged with existing files
   2% |\--------------|   67/2945 [329.9ms elapsed, 14.2s remaining, 203.1 samples/s] 



   3% ||--------------|   90/2945 [431.7ms elapsed, 13.7s remaining, 208.5 samples/s] 



   7% |/--------------|  195/2945 [1.1s elapsed, 15.7s remaining, 175.7 samples/s]    



  32% |████\----------|  937/2945 [4.2s elapsed, 9.0s remaining, 218.5 samples/s]     



  46% |██████|--------| 1364/2945 [5.9s elapsed, 6.7s remaining, 241.6 samples/s]     



  60% |████████|------| 1760/2945 [7.6s elapsed, 5.0s remaining, 243.6 samples/s]     



 100% |███████████████| 2945/2945 [12.1s elapsed, 0s remaining, 265.7 samples/s]      


In [36]:

test.export(
    export_dir=export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=label_field,
    split="test",
    classes=["person", "cat", "dog"]
)


Directory '../datasets/COCO_REDUC_PP' already exists; export will be merged with existing files
 100% |█████████████| 40670/40670 [33.3s elapsed, 0s remaining, 1.3K samples/s]      


In [37]:

train.export(
    export_dir=export_dir,
    dataset_type=fo.types.YOLOv5Dataset,
    label_field=label_field,
    split="train",
    classes=["person", "cat", "dog"]
)

Directory '../datasets/COCO_REDUC_PP' already exists; export will be merged with existing files
 100% |█████████████| 69713/69713 [4.7m elapsed, 0s remaining, 242.8 samples/s]      


In [38]:
# Count itens from each class

import glob

export_dir = "../datasets/COCO_REDUC_PP"

person = "0"
cat = "1"
dog = "2"

def count_classes(dir):
    persons = 0
    cats = 0
    dogs = 0
    for file in glob.glob(dir + "*.txt"):
        with open(file) as f:
            for line in f:
                s = line.split(" ", 1)
                if s[0] == person:
                    persons += 1
                elif s[0] == cat:
                    cats += 1
                elif s[0] == dog:
                    dogs += 1
            f.close()
    print("Persons: ", persons)
    print("Cats: ", cats)
    print("Dogs: ", dogs)
    return [persons, cats, dogs]


train_dir = export_dir + "/labels/train/"
test_dir = export_dir + "/labels/test/"
validation_dir = export_dir + "/labels/val/"

print(" ---- TRAIN ----")
train_counts = count_classes(train_dir)

print(" ---- TEST ----")
test_counts = count_classes(test_dir)

print(" ---- VALIDATION ----")
validation_counts = count_classes(validation_dir)


 ---- TRAIN ----
Persons:  262465
Cats:  4768
Dogs:  5508
 ---- TEST ----
Persons:  0
Cats:  0
Dogs:  0
 ---- VALIDATION ----
Persons:  11004
Cats:  202
Dogs:  218


In [41]:
# Remove persons
import random, os

def remove_persons_excess(labels_dir, images_dir, persons, others):
    print ("----> " + str(persons) + " persons and others " + str(others) + " objects")
    without_persons_count = 0
    files = glob.glob(labels_dir + "*.txt")
    while persons > others:
        file = random.choice(files)
        name = file.split('/')
        filename = name[-1].split('.')
        img = images_dir + filename[0] + ".jpg"
        p = 0
        o = 0
        with open(file) as f:
            for line in f: 
                s = line.split(" ", 1)
                if s[0] == person:
                    p += 1
                else:
                    o += 1
            f.close()
        if p > 0 and o == 0: # have persons, but no cat and dogs
            try:
                os.remove(file)
                os.remove(img)
                persons -= p
                without_persons_count = 0
                files.remove(file)
            except:
                print("Error while deleting file : ", file)
        else:
            without_persons_count += 1
            if without_persons_count > 100: # prevent running for ever, if no files to delete
                print("-------------> Emergency stop....")
                break
        print("Files in dir: ", len(files), end="\r")
    print("---------------------> REMOVE PERSONS FINISHED")


In [40]:
validation_counts = count_classes(validation_dir)
remove_persons_excess(validation_dir, export_dir + "/images/val/", validation_counts[0], validation_counts[1] + validation_counts[2])
count_classes(validation_dir)

Persons:  11004
Cats:  202
Dogs:  218
----> 11004 persons and others 420 objects
Persons:  419  3810
Cats:  202
Dogs:  218


[419, 202, 218]

In [42]:
train_counts = count_classes(train_dir)
remove_persons_excess(train_dir, export_dir + "/images/train/", train_counts[0], train_counts[1] + train_counts[2])
count_classes(train_dir)

Persons:  262465
Cats:  4768
Dogs:  5508
----> 262465 persons and others 10276 objects
---------------------> REMOVE PERSONS FINISHED
Persons:  10274
Cats:  4768
Dogs:  5508


[10274, 4768, 5508]

In [43]:
# train yolo with PP dataset in COCO format
!python3 ../yolov5/train.py --img 640 --batch 8 --epochs 100 --data ../applied-machine-learning/coco_pp_reduc.yaml --cfg ../yolov5/models/yolov5s.yaml --name COCO_PP_REDUC

[34m[1mtrain: [0mweights=../yolov5/yolov5s.pt, cfg=../yolov5/models/yolov5s.yaml, data=../applied-machine-learning/coco_pp_reduc.yaml, hyp=../yolov5/data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=8, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=../yolov5/runs/train, name=COCO_PP_REDUC, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 40 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v6.1-14-g8a66eba torch 1.10.2+cu102 CUDA:0 (NVIDIA GeForce GTX 1650, 3912MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warm