# Modify labels from COCO format to YOLO format

The format of the labels in `train.csv` are xmin ymin w h. We need to convert these to the YOLO format, which are normalized from 0 to 1 and xmin ymin w h . 

In [None]:
from ast import literal_eval
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from google.colab import drive

drive.mount('/content/gdrive')

train_df = pd.read_csv("/content/gdrive/My Drive/wheat/input/train.csv")
train_df.head()

In [None]:
def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return [x, y, w, h]

def convert_to_yolo_label(coco_format_box, w = 1024, h = 1024):
    bbox = literal_eval(coco_format_box)
    xmin = bbox[0]
    xmax = bbox[0] + bbox[2]
    ymin = bbox[1]
    ymax = bbox[1] + bbox[3]
    b = (float(xmin), float(xmax), float(ymin), float(ymax))
    yolo_box = convert((w, h), b)
    if np.max(yolo_box) > 1 or np.min(yolo_box) < 0:
        print("BOX HAS AN ISSUE")
    return yolo_box

train_df['yolo_box'] = train_df.bbox.apply(convert_to_yolo_label)

In [None]:
print("We have {} unique images with boxes.".format(len(train_df.image_id.unique())))
unique_img_ids = train_df.image_id.unique()

In [None]:
folder_location = "/content/gdrive/My Drive/wheat/input/wheat_yolo_train_data"
for img_id in unique_img_ids:
    filt_df = train_df.query("image_id == @img_id")
    #print(filt_df.shape[0])
    all_boxes = filt_df.yolo_box.values
    file_name = "{}/{}.txt".format(folder_location,img_id) # specify the name of the folder and get a file name

    s = "0 %s %s %s %s \n" 
    with open(file_name, 'a') as file:
        for i in all_boxes:
            new_line = (s % tuple(i))
            file.write(new_line)  

In [None]:
import glob
all_imgs = glob.glob("/content/gdrive/My Drive/wheat/input/train/*.jpg")
all_imgs = [i.split("/")[-1].replace(".jpg", "") for i in all_imgs]
positive_imgs = train_df.image_id.unique()

In [None]:
negative_images = set(all_imgs) - set(positive_imgs)
print(len(all_imgs), len(positive_imgs))

In [None]:
for i in list(negative_images):
    file_name = "/content/gdrive/My Drive/wheat/input/wheat_yolo_train_data/{}.txt".format(i)
    with open(file_name, 'w') as fp: 
        pass

Now we will copy all of the .txt bounding boxes and the pictures into the same folder and zip it. 

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')
!mkdir data
%cd data
!cp /content/gdrive/My\ Drive/wheat/input/wheat_yolo_train_data/* .
print("Done copying labels. Now copying images.")
!cp /content/gdrive/My\ Drive/wheat/input/train/*.jpg .
print("Done copying Images. Now zipping.")
!zip -r newobj.zip /content/data/* 
print("Finally done.")