In [1]:
import solaris as sol
import numpy as np
import geopandas as gpd
import os
import sys
import pandas as pd
import gdal
from solaris.vector.mask import footprint_mask
from solaris.vector.polygon import geojson_to_px_gdf, get_overlapping_subset
from solaris.utils.core import _check_gdf_load
import glob
from tqdm import tqdm_notebook as tqdm
import shapely
import shutil
import datetime
import rasterio 
import RarePlanes.tools.create_custom_classes

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [18]:
import argparse
import geopandas as gpd
import pandas as pd
import os
import glob
from tqdm import tqdm


def count_unique_index(df, by):
    return df.groupby(by).size().reset_index().rename(columns={0: 'count'})


def create_custom_classes(all_annotations_geojson, geojson_dir, output_path, category_attributes):
    """ parse the geojson files and create custom classes based upon
    unique variatons of the RarePlanes attributes.
        -all_annotations_geojson (str): The path to the
        `RarePlanes_Public_All_Annotations.geojson` file.
        - geojson_dir (str): directory containing the geojson files
        for individual images or tiles
        - output_path (str): directory to output the customized geojsons. Need to provide the absolute path.
        - category_attributes (list): A list of attributes to combine
        to create a custom class.  Choose any combintaion of the following:
        ['role','num_engines', 'propulsion', 'canards', 'num_tail_fins',
       'wing_position', 'wing_type', 'faa_wingspan_class']
    :returns
        - new geojsons with a custom_id for each combination of unique
        attributes.
        -A lookup table for each classes custom_id.
    """
    os.makedirs(output_path, exist_ok=True)
    gdf = gpd.read_file(all_annotations_geojson)
    lookup_gdf = count_unique_index(gdf, category_attributes)
    lookup_gdf['custom_id'] = list(range(1, len(lookup_gdf) + 1))
    lookup_gdf.drop(columns=['count'], inplace=True)
    lookup_gdf.to_csv(os.path.join(output_path, "custom_class_lookup.csv"))
    os.chdir(geojson_dir)
    geojsons = glob.glob("*.geojson")
    for geojson in tqdm(geojsons):
        gdf = gpd.read_file(geojson)
        gdf = pd.merge(gdf, lookup_gdf, on=category_attributes, how='left')
        print(output_path)
        print(geojson)
        gdf.to_file(os.path.join(output_path, geojson), driver="GeoJSON")

In [20]:
all_annotations_geojson = '/local_data/cosmiq/wdata/achadda/rareplanes-public/RarePlanes_Public_All_Annotations.geojson'
geojson_dir_train = '/local_data/cosmiq/wdata/achadda/rareplanes-public/trainOriginal/geojson_aircraft_tiled'
geojson_dir_test = '/local_data/cosmiq/wdata/achadda/rareplanes-public/testOriginal/geojson_aircraft_tiled'

#output_path_train_one = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_one'
#output_path_train_two = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_two'
#output_path_train_three = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_three'
#output_path_train_four = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_four'
output_path_train_five = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_class_five/train'

#output_path_test_one = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_one'
#output_path_test_two = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_two'
#output_path_test_three = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_three'
#output_path_test_four = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_four'
output_path_test_five = '/local_data/cosmiq/src/achadda/yolo_planes/yolo_class_five/val'

#class_one = ['num_engines', 'propulsion']
#class_two = ['role']
#class_three = ['num_tail_fins','wing_position', 'wing_type']
#class_four = ['role','num_engines', 'propulsion', 'canards', 'num_tail_fins','wing_position', 'wing_type', 'faa_wingspan_class']
class_five = ['num_engines']

#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_train, output_path_train_one, class_one)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_train, output_path_train_two, class_two)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_train, output_path_train_three, class_three)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_train, output_path_train_four, class_four)
create_custom_classes(all_annotations_geojson, geojson_dir_train, output_path_train_five, class_five)

#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_one, class_one)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_two, class_two)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_three, class_three)
#RarePlanes.tools.create_custom_classes.create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_four, class_four)
create_custom_classes(all_annotations_geojson, geojson_dir_test, output_path_test_five, class_five)


  0%|          | 1/5815 [00:00<21:53,  4.43it/s]

/local_data/cosmiq/src/achadda/yolo_planes/yolo_class_five/train
85_104001003A13C600_tile_10.geojson
/local_data/cosmiq/src/achadda/yolo_planes/yolo_class_five/train
37_104001000EA43C00_tile_439.geojson
/local_data/cosmiq/src/achadda/yolo_planes/yolo_class_five/train
1_104005000FDC8D00_tile_64.geojson





UnicodeEncodeError: 'ascii' codec can't encode character '\xeb' in position 30: ordinal not in range(128)

In [36]:
## Training Labels to YOLO
def gdf_to_yolo(geodataframe, image, output_dir, column='single_id', im_size=(512, 512), min_overlap=0):
    """
    Convert a geodataframe containing polygons to yolo/yolt format.
    Arguments
    ---------
    geodataframe : str
        Path to a :class:`geopandas.GeoDataFrame` with a column named
        ``'geometry'``.  Can be created from a geojson with labels for unique
        objects. Can be converted to this format with
        ``geodataframe=gpd.read_file("./xView_30.geojson")``.
    image : str
        Path to a georeferenced image (ie a GeoTIFF or png created with GDAL)
        that geolocates to the same geography as the `geojson`(s). This function will also
        accept a :class:`osgeo.gdal.Dataset` or :class:`rasterio.DatasetReader`
        with georeferencing information in this argument.
    output_dir : str
        Path to an output directory where all of the yolo readable text files
        will be placed.
    column : str, optional
        The column name that contians an unique integer id for each of object
        class.
    im_size : tuple, optional
        A tuple specifying the x and y heighth of a an image.  If specified as
        ``(0,0)`` (the default,) then the size is determined automatically.
    min_overlap : float, optional
        A float value ranging from 0 to 1.  This is a percantage.  If a polygon
        does not overlap the image by at least min_overlap, the polygon is
        discarded.  i.e. 0.66 = 66%. Default value of 0.66.
    Returns
    -------
    gdf : :class:`geopandas.GeoDataFrame`.
        The txt file will be written to the output_dir, however the the output
        gdf itself is returned.
    """
    if im_size == (0, 0):
        imsize_extract = rasterio.open(image).read()
        if len(imsize_extract.shape) == 3:
            im_size = (imsize_extract.shape[1], imsize_extract.shape[2])
        else:
            im_size = (imsize_extract.shape[0], imsize_extract.shape[1])
    [x0, y0, x1, y1] = [0, 0, im_size[0], im_size[1]]
    out_coords = [[x0, y0], [x0, y1], [x1, y1], [x1, y0]]
    points = [shapely.geometry.Point(coord) for coord in out_coords]
    pix_poly = shapely.geometry.Polygon([[p.x, p.y] for p in points])
    dw = 1. / im_size[0]
    dh = 1. / im_size[1]
    header = ["column_sub_1", "x", "y", "w", "h"]
    if os.path.isdir(output_dir) is False:
        os.mkdir(output_dir)
    imgName = image.split('/')[9]
    output = os.path.join(output_dir, imgName.split('.png')[0] + ".txt")
    gdf = geojson_to_px_gdf(geodataframe, image, precision=None)
    #gdf['column_sub_1'] = gdf[column] - 1
    gdf['column_sub_1'] = gdf[column]
    gdf['area'] = gdf['geometry'].area
    gdf['intersection'] = (
        gdf['geometry'].intersection(pix_poly).area / gdf['area'])
    gdf = gdf[gdf['area'] != 0]
    gdf = gdf[gdf['intersection'] >= min_overlap]
    if not gdf.empty:
        boxy = gdf['geometry'].bounds
        for _,row in boxy.iterrows():
            if row['maxx'] > im_size[0]:
                row['maxx'] = im_size[0]
            if row['minx'] < 0:
                row['minx'] = 0
            if row['maxy'] > im_size[1]:
                row['maxy'] = im_size[1]
            if row['miny'] < 0:
                row['miny'] = 0
        boxy['xmid'] = (boxy['minx'] + boxy['maxx']) / 2.0
        boxy['ymid'] = (boxy['miny'] + boxy['maxy']) / 2.0
        boxy['w0'] = (boxy['maxx'] - boxy['minx'])
        boxy['h0'] = (boxy['maxy'] - boxy['miny'])
        boxy['x'] = boxy['xmid'] * dw
        boxy['y'] = boxy['ymid'] * dh
        boxy['w'] = boxy['w0'] * dw
        boxy['h'] = boxy['h0'] * dh
        if not boxy.empty:
            gdf = gdf.join(boxy)
        gdf.to_csv(path_or_buf=output, sep=' ', columns=header, index=False, header=False)
    return gdf

In [37]:
truthDirTrainName = '/local_data/cosmiq/src/achadda/yolo_planes/class_four/images/train'
truthDirTestName = '/local_data/cosmiq/src/achadda/yolo_planes/class_four/images/val'
truthDirTrain = os.fsencode(truthDirTrainName)
truthDirTest = os.fsencode(truthDirTestName)

#geoDirTrainOneName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_one'
#geoDirTrainTwoName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_two'
#geoDirTrainThreeName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_three'
#geoDirTrainFourName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_four'
geoDirTrainFiveName = '/local_data/cosmiq/wdata/achadda/rareplanes-public/trainOriginal/geojson_aircraft_tiled'
#geoDirTrainOne = os.fsencode(geoDirTrainOneName)
#geoDirTrainTwo = os.fsencode(geoDirTrainTwoName)
#geoDirTrainThree = os.fsencode(geoDirTrainThreeName)
#geoDirTrainFour = os.fsencode(geoDirTrainFourName)
geoDirTrainFive = os.fsencode(geoDirTrainFiveName)

#geoDirTestOneName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_one'
#geoDirTestTwoName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_two'
#geoDirTestThreeName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_three'
#geoDirTestFourName = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_four'
geoDirTestFiveName = '/local_data/cosmiq/wdata/achadda/rareplanes-public/testOriginal/geojson_aircraft_tiled'
#geoDirTestOne = os.fsencode(geoDirTestOneName)
#geoDirTestTwo = os.fsencode(geoDirTestTwoName)
#geoDirTestThree = os.fsencode(geoDirTestThreeName)
#geoDirTestFour = os.fsencode(geoDirTestFourName)
geoDirTestFive = os.fsencode(geoDirTestFiveName)

#outputDirectoryTrainOne = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_one_labels'
#outputDirectoryTrainTwo = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_two_labels'
#outputDirectoryTrainThree = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_three_labels'
#outputDirectoryTrainFour = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_train/yolo_class_four_labels'
outputDirectoryTrainFive = '/local_data/cosmiq/src/achadda/yolo_planes/class_five/labels/train'
#os.makedirs(outputDirectoryTrainOne, exist_ok = True)
#os.makedirs(outputDirectoryTrainTwo, exist_ok = True)
#os.makedirs(outputDirectoryTrainThree, exist_ok = True)
#os.makedirs(outputDirectoryTrainFour, exist_ok = True)
os.makedirs(outputDirectoryTrainFive, exist_ok = True)

#outputDirectoryTestOne = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_one_labels'
#outputDirectoryTestTwo = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_two_labels'
#outputDirectoryTestThree = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_three_labels'
#outputDirectoryTestFour = '/home/local_data/cosmiq/src/achadda/yolo_planes/yolo_classes_test/yolo_class_four_labels'
outputDirectoryTestFive = '/local_data/cosmiq/src/achadda/yolo_planes/class_five/labels/val'
#os.makedirs(outputDirectoryTestOne, exist_ok = True)
#os.makedirs(outputDirectoryTestTwo, exist_ok = True)
#os.makedirs(outputDirectoryTestThree, exist_ok = True)
#os.makedirs(outputDirectoryTestFour, exist_ok = True)
os.makedirs(outputDirectoryTestFive, exist_ok = True)


In [39]:
geoDirectoryName = geoDirTestFiveName
geoDirectory = geoDirTestFive
truthDirectoryName = truthDirTestName
truthDirectory = truthDirTest
outputDirectory = outputDirectoryTestFive

for file in tqdm(os.listdir(geoDirectory)):
    filename = os.fsdecode(file)
    if filename.endswith(".geojson"):
        geodataframe = gpd.read_file(geoDirectoryName + '/' + filename)
        imgName = filename.replace('.geojson', '')     
        
        for truth in os.listdir(truthDirectory):
            truthName = os.fsdecode(truth)
            if truthName.endswith(".png"):
                checkName = truthName.replace('.png', '')
                if (checkName == imgName):
                    image = truthDirectoryName + '/' + truthName 
                    gdf_to_yolo(geodataframe, image, output_dir=outputDirectory, column='num_engines', im_size=(512, 512), min_overlap=0)
                    

100%|██████████| 2710/2710 [02:15<00:00, 20.05it/s]
