# Prep Data for Upload to the Cloud

Having labeled the data, we can prep it for uploading it to Google drive for further processing.

In [1]:
from glob import glob
import os
import shutil

n_images_per_class = 250

def init_clean_dir(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.mkdir(path)

dataset_name = str(n_images_per_class) + "_images"
dataset_path = "./dataset/" + dataset_name
init_clean_dir(dataset_path)

class_file_path = "./iNaturalist/images/Poison Ivy/classes.txt"
shutil.copyfile(class_file_path, dataset_path + "/obj.names")

for line in open(class_file_path):
    the_label = line.strip()
    save_data_to = dataset_path + "/" + the_label.replace(" ", "_")
    init_clean_dir(save_data_to)
    pattern = "./iNaturalist/images/" + the_label + "/*.txt"
    labels = glob(pattern)
    labels.remove("./iNaturalist/images/" + the_label + "\\classes.txt")
    i = 0
    for coords_path in labels:
        if i < n_images_per_class:
            i += 1
            record_id = int(coords_path.split("\\")[-1].split(".txt")[0])
            pattern = "./iNaturalist/images/" + the_label + "/*" + str(record_id) + "*"
            files = glob(pattern)
            image_path = [x for x in files if x not in coords_path][0]
            new_image_path = save_data_to + "/" + str(record_id) + ".jpg"
            new_coords_path = save_data_to + "/" + str(record_id) + ".txt"
            shutil.copyfile(image_path, new_image_path)
            shutil.copyfile(coords_path, new_coords_path)    

## YOLOv5 Config
Generate the YAML used to train the YOLOv5 model

In [2]:
names = "names: ["
n_classes = 0
file = open(class_file_path, "r")
for line in file.readlines():
    n_classes += 1
    names += "'" + line.strip() + "', "
names = names[:len(names)-2] + "]"

f = open(dataset_path + "yolov5.yaml", "w")
f.write("train: ../data/images/train/\r")
f.write("val: ../data/images/val/\r")
f.write("\r")
f.write("nc: " + str(n_classes) + "\r")
f.write("\r")
f.write(names)
f.close()

## YOLOv4 Config

In [3]:
yolov4_config = dataset_path + "/yolov4-custom.cfg"
shutil.copyfile("YOLO config files/yolov4-custom.cfg", yolov4_config)
lines = open(yolov4_config, "r").readlines()
for line_num in [970, 1058, 1146]:
    lines[line_num] = "classes="+str(n_classes)+"\n"
for line_num in [963, 1051, 1139]:
    lines[line_num] = "filters=" + str((n_classes + 5) * 3)+"\n"
out = open(yolov4_config, 'w')
out.writelines(lines)
out.close()

## YOLOv3 Config

In [4]:
yolov3_config = dataset_path + "/yolov3-spp.cfg"
shutil.copyfile("YOLO config files/yolov3-spp.cfg", yolov3_config)
lines = open(yolov3_config, "r").readlines()
for line_num in [643, 729, 816]:
    lines[line_num] = "classes="+str(n_classes)+"\n"
for line_num in [636, 722, 809]:
    lines[line_num] = "filters=" + str((n_classes + 5) * 3)+"\n"
out = open(yolov3_config, 'w')
out.writelines(lines)
out.close()

## Zip it up
Finally compress everything so it can be uploaded to the cloud

In [5]:
shutil.make_archive("./dataset/" + dataset_name, "zip", dataset_path)
shutil.rmtree(dataset_path)
print("Ready for Upload!")

Ready for Upload!
