In [None]:
from multiprocessing import cpu_count

cpu_count()

In [None]:
import json
import os
from glob import glob
from PIL import Image
from osgeo import gdal
import cv2
from matplotlib import pyplot as plt
from matplotlib import patches
import numpy as np
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

from typing import List
import math
from tqdm import tqdm

In [None]:
os.chdir('/content/drive/MyDrive/Colab Notebooks/aifactory/satellite_tank')
base_dir = os.getcwd()

In [None]:
os.chdir(base_dir)
train_labels_paths = sorted(glob('./data/oiltank_dataset/train_labels/*.json'))
train_labels_paths[:5]

In [None]:
smp_idx = 64
with open(train_labels_paths[smp_idx]) as f:
    label_smp = json.load(f)
label_smp

In [None]:
label_smp['features'][0]['properties']

In [None]:
fig, ax = plt.subplots(1, 1)

ax.add_patch(
    patches.Rectangle(
        (2, 3),
        1,
        2,
        edgecolor='red',
        fill=False,
    )
)

ax.add_patch(
    patches.Polygon(
        ((2, 3), (3, 5), (3, 8), (2, 7)),
        closed=True,
        fill=False,
        edgecolor='green'
    )
)

ax.plot([1, 4], [1, 4])

In [None]:
smp_img_path = train_labels_paths[smp_idx]
smp_img_path = smp_img_path.replace('labels', 'images')
smp_img_path = smp_img_path.replace('json', 'png')
smp_img_path

In [None]:
smp_img = Image.open(smp_img_path)

In [None]:
label_smp['features']

In [None]:
x1, y1, x2, y2, x3, y3, x4, y4 = label_smp['features'][0]['properties']['object_imcoords'].split(',')
x1, y1

In [None]:
def calc_area(pts):
    '''
    ref: https://en.wikipedia.org/wiki/Shoelace_formula
    '''
    N = len(pts)
    s1 = 0
    s2 = 0
    for i in range(N-1):
        s1 += pts[i][0] * pts[i+1][1]
        s2 += pts[i][1] * pts[i+1][0]

    return abs((s1-s2)/2)

fig, ax = plt.subplots(1, 1, figsize=(12, 12))

for item in label_smp['features']:
    corrds = item['properties']['object_imcoords'].split(',')
    coords = [np.float64(x) for x in corrds]
    x1, y1, x2, y2, x3, y3, x4, y4 = coords

    bbox = patches.Polygon(
        ((x1, y1),(x2, y2),(x3, y3),(x4, y4),),
        edgecolor='cyan',
        fill = False,
        closed=True,
        )
    ax.add_patch(bbox)

    x_cent = (x1 + x2 + x3 + x4)/4
    y_cent = (y1 + y2 + y3 + y4)/4

    ax.plot(x_cent, y_cent, 'o', color='red')

    area = calc_area([(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)])
    half_edge = np.sqrt(area)/2

    approx_bbox = patches.Polygon(
        (
            (x_cent-half_edge, y_cent+half_edge),
            (x_cent+half_edge, y_cent+half_edge),
            (x_cent+half_edge, y_cent-half_edge),
            (x_cent-half_edge, y_cent-half_edge),
            ),
        edgecolor='red',
        fill=False,
        closed=True,
    )
    ax.add_patch(approx_bbox)

ax.imshow(smp_img)

In [None]:
# Check bounding box format
for label_num, train_label_path in enumerate(train_labels_paths):
    with open(train_label_path) as f:
        label_json = json.load(f)

    print(f'Label number {label_num}')

    for item in label_json['features']:
        box_coords = item['properties']['object_imcoords'].split(',')
        box_coords = [np.float64(x) for x in box_coords]
        x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

        if abs(y2-y1) > 1:
            print(f'y2 - y1 {y2-y1}')
            print(f'y4 - y3 {y4-y3}')

In [None]:
# Check image size
img_size = []
for label_num, train_label_path in enumerate(train_labels_paths):
    with open(train_label_path) as f:
        label_json = json.load(f)

    img_path = train_label_path
    img_path = smp_img_path.replace('labels', 'images')
    img_path = smp_img_path.replace('json', 'png')

    img = Image.open(img_path)
    img_size.append(np.array(img).shape)

set(img_size)

In [None]:
train_labels_paths[0].split('/')[-1].replace('json', 'txt')

In [None]:
from sklearn.model_selection import train_test_split

t, v = train_test_split(
    train_labels_paths,
    train_size=68,
    random_state=0
)
v

In [None]:
# Converting original data to YOLO format (*.txt)
def convert_yolo_data_format(
    original_dir:str, original_img_dir:str,
    destination_dir:str, destination_img_dir:str,
    data_split=False, train_size:int or float=0.8, random_state=0,
    ):
    raise NotImplementedError('This function is deprecated')

    # Make destination directory
    try:
        os.makedirs(destination_dir)
    except:
        pass

    try:
        os.makedirs(destination_img_dir)
    except:
        pass

    labels_paths = sorted(glob(original_dir + '*.json'))

    if data_split:
        train_paths, val_paths = train_test_split(
            train_labels_paths,
            train_size=train_size,
            random_state=random_state,
            )

        # Make splitted directories
        try:
            os.makedirs(os.path.join(destination_dir, 'train'))
            os.makedirs(os.path.join(destination_dir, 'val'))
        except:
            pass

        try:
            os.makedirs(os.path.join(destination_img_dir, 'train'))
            os.makedirs(os.path.join(destination_img_dir, 'val'))
        except:
            pass

        # Make train
        for label_path in tqdm(train_paths, desc='Converting train data as YOLO format with split'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, 'train', img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, 'train', txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                x_center = abs(0.5*(x2+x1))/1024
                y_center = abs(0.5*(y3+y2))/1024
                width = abs(x2-x1)/1024
                height = abs(y3-y2)/1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

        # Make validation
        for label_path in tqdm(val_paths, desc='Converting val data as YOLO format with split'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, 'val', img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, 'val', txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                x_center = abs(0.5*(x2+x1))/1024
                y_center = abs(0.5*(y3+y2))/1024
                width = abs(x2-x1)/1024
                height = abs(y3-y2)/1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

    else:
        for label_path in tqdm(labels_paths, desc='Converting as YOLO format'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                x_center = abs(0.5*(x2+x1))/1024
                y_center = abs(0.5*(y3+y2))/1024
                width = abs(x2-x1)/1024
                height = abs(y3-y2)/1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

data_version = 'v0'

convert_yolo_data_format(
    './data/oiltank_dataset/train_labels/',
    './data/oiltank_dataset/train_images',
    f'./data/oiltank_dataset/yolo_format_{data_version}/labels/',
    f'./data/oiltank_dataset/yolo_format_{data_version}/images/',
    data_split=False,
    train_size=55,
    random_state=0,
    )

In [None]:
# Converting original data to YOLO format 2(*.txt)
def convert_yolo_data_format2(
    original_dir:str, original_img_dir:str,
    destination_dir:str, destination_img_dir:str,
    data_split=False, train_size:int or float=0.8, random_state=0,
    ):
    # Make destination directory
    try:
        os.makedirs(destination_dir)
    except:
        pass

    try:
        os.makedirs(destination_img_dir)
    except:
        pass

    labels_paths = sorted(glob(original_dir + '*.json'))

    if data_split:
        train_paths, val_paths = train_test_split(
            train_labels_paths,
            train_size=train_size,
            random_state=random_state,
            )

        # Make splitted directories
        try:
            os.makedirs(os.path.join(destination_dir, 'train'))
            os.makedirs(os.path.join(destination_dir, 'val'))
        except:
            pass

        try:
            os.makedirs(os.path.join(destination_img_dir, 'train'))
            os.makedirs(os.path.join(destination_img_dir, 'val'))
        except:
            pass

        # Make train
        for label_path in tqdm(train_paths, desc='Converting train data as YOLO format with split'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, 'train', img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, 'train', txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                area = calc_area([(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)])
                edge = np.sqrt(area)

                x_center = ((x1 + x2 + x3 + x4)/4) /1024
                y_center = ((y1 + y2 + y3 + y4)/4) /1024
                width = edge /1024
                height = edge /1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

        # Make validation
        for label_path in tqdm(val_paths, desc='Converting val data as YOLO format with split'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, 'val', img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, 'val', txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                area = calc_area([(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)])
                edge = np.sqrt(area)

                x_center = ((x1 + x2 + x3 + x4)/4) /1024
                y_center = ((y1 + y2 + y3 + y4)/4) /1024
                width = edge /1024
                height = edge /1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

    else:
        for label_path in tqdm(labels_paths, desc='Converting as YOLO format'):
            # open single json
            with open(label_path) as f:
                label_json = json.load(f)

            # Copy corresponding image
            img_nm = label_path.split('/')[-1].replace('json', 'png')
            shutil.copy(
                os.path.join(original_img_dir, img_nm),
                os.path.join(destination_img_dir, img_nm)
            )

            # open txt file
            txt_nm = label_path.split('/')[-1].replace('json', 'txt')
            txt_label = open(os.path.join(destination_dir, txt_nm), 'w+')

            for item in label_json['features']:
                box_coords = item['properties']['object_imcoords'].split(',')
                box_coords = [np.float64(x) for x in box_coords]
                x1, y1, x2, y2, x3, y3, x4, y4 = box_coords

                area = calc_area([(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)])
                edge = np.sqrt(area)

                x_center = ((x1 + x2 + x3 + x4)/4) /1024
                y_center = ((y1 + y2 + y3 + y4)/4) /1024
                width = edge /1024
                height = edge /1024

                txt_label.write('0 %f %f %f %f\n'%(x_center, y_center, width, height))

            # Close writing txt file
            f.close()

data_version = 'v3'

convert_yolo_data_format2(
    './data/oiltank_dataset/train_labels/',
    './data/oiltank_dataset/train_images',
    f'./data/oiltank_dataset/yolo_format_{data_version}/labels/',
    f'./data/oiltank_dataset/yolo_format_{data_version}/images/',
    data_split=True,
    train_size=55,
    random_state=0,
    )