In [3]:
import json
import os
from pathlib import Path

import numpy as np
from PIL import Image
from tqdm import tqdm

# from utils.datasets import autosplit
# from utils.general import download, xyxy2xywhn

In [4]:
def convert_labels(fname=Path('xView/xView_train.geojson')):
    # Convert xView geoJSON labels to YOLO format
    path = fname.parent
    with open(fname) as f:
        print(f'Loading {fname}...')
        data = json.load(f)

    # Make dirs
    labels = Path(path / 'labels' / 'train')
    os.system(f'rm -rf {labels}')
    labels.mkdir(parents=True, exist_ok=True)

    # xView classes 11-94 to 0-59
    xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
                        12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
                        29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
                        47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]

    shapes = {}
    for feature in tqdm(data['features'], desc=f'Converting {fname}'):
        p = feature['properties']
        if p['bounds_imcoords']:
            id = p['image_id']
            file = path / 'train_images' / id
            if file.exists():  # 1395.tif missing
                try:
                    box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
                    assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
                    cls = p['type_id']
                    cls = xview_class2index[int(cls)]  # xView class to 0-60
                    assert 59 >= cls >= 0, f'incorrect class index {cls}'

                    # Write YOLO label
                    if id not in shapes:
                        shapes[id] = Image.open(file).size
                    box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
                    with open((labels / id).with_suffix('.txt'), 'a') as f:
                        f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
                except Exception as e:
                    print(f'WARNING: skipping one label for {file}: {e}')


# Download manually from https://challenge.xviewdataset.org
# dir = Path(yaml['path'])  # dataset root dir
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
#         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
#         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
# download(urls, dir=dir, delete=False)

# Convert labels
# convert_labels(dir / 'xView_train.geojson')

# # Move images
# images = Path(dir / 'images')
# images.mkdir(parents=True, exist_ok=True)
# Path(dir / 'train_images').rename(dir / 'images' / 'train')
# Path(dir / 'val_images').rename(dir / 'images' / 'val')

# # Split
# autosplit(dir / 'images' / 'train')


In [5]:
convert_labels()

Loading xView\xView_train.geojson...


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
Converting xView\xView_train.geojson:  27%|██▋       | 161723/601937 [00:11<01:08, 6443.85it/s] 



Converting xView\xView_train.geojson:  50%|████▉     | 298186/601937 [00:20<00:09, 33339.06it/s]



Converting xView\xView_train.geojson:  61%|██████    | 366627/601937 [00:33<00:47, 4909.44it/s] 



Converting xView\xView_train.geojson:  82%|████████▏ | 491721/601937 [00:39<00:01, 55170.81it/s]



Converting xView\xView_train.geojson:  83%|████████▎ | 498662/601937 [00:40<00:07, 13183.18it/s]



Converting xView\xView_train.geojson:  97%|█████████▋| 585125/601937 [00:51<00:01, 8841.66it/s] 



Converting xView\xView_train.geojson:  98%|█████████▊| 590384/601937 [00:52<00:01, 7502.55it/s]



Converting xView\xView_train.geojson: 100%|██████████| 601937/601937 [00:54<00:00, 10978.00it/s]


In [4]:
import os
import shutil

In [9]:
all_labels = []
for root, dir, files in os.walk("xView\\labels\\train"):
    for file in files:
        all_labels.append(file.split('.')[0])

In [20]:
for root, dir, files in os.walk("xView\\train_images"):
    for file in files:
        img_name = file.split('.')[0]
        if img_name in all_labels:
            shutil.copy2(os.path.join(root, file), 'xView/train')

In [5]:
import pandas as pd
import numpy as np
from PIL import Image
from shapely.geometry import Polygon
import glob
import argparse
import os
import random
from shutil import copyfile
 

def tiler(imnames, newpath, falsepath, slice_size, ext):
    for imname in tqdm(imnames):
        try:
            im = Image.open(imname)
            imr = np.array(im, dtype=np.uint8)
            height = imr.shape[0]
            width = imr.shape[1]
            labname = imname.replace(ext, '.txt')
            labels = pd.read_csv(labname, sep=' ', names=['class', 'x1', 'y1', 'w', 'h'])
            
            # we need to rescale coordinates from 0-1 to real image height and width
            labels[['x1', 'w']] = labels[['x1', 'w']] * width
            labels[['y1', 'h']] = labels[['y1', 'h']] * height
            
            boxes = []
            
            # convert bounding boxes to shapely polygons. We need to invert Y and find polygon vertices from center points
            for row in labels.iterrows():
                x1 = row[1]['x1'] - row[1]['w']/2
                y1 = (height - row[1]['y1']) - row[1]['h']/2
                x2 = row[1]['x1'] + row[1]['w']/2
                y2 = (height - row[1]['y1']) + row[1]['h']/2

                boxes.append((int(row[1]['class']), Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])))
            
            counter = 0
            # create tiles and find intersection with bounding boxes for each tile
            for i in range((height // slice_size)):
                for j in range((width // slice_size)):
                    x1 = j*slice_size
                    y1 = height - (i*slice_size)
                    x2 = ((j+1)*slice_size) - 1
                    y2 = (height - (i+1)*slice_size) + 1

                    pol = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])
                    imsaved = False
                    slice_labels = []

                    for box in boxes:
                        if pol.intersects(box[1]):
                            inter = pol.intersection(box[1])        
                            
                            if not imsaved:
                                sliced = imr[i*slice_size:(i+1)*slice_size, j*slice_size:(j+1)*slice_size]
                                sliced_im = Image.fromarray(sliced)
                                filename = imname.split('/')[-1]
                                slice_path = newpath + "/" + filename.replace(ext, f'_{i}_{j}{ext}')                            
                                slice_labels_path = newpath + "/" + filename.replace(ext, f'_{i}_{j}.txt')                            
                                sliced_im.save(slice_path)
                                imsaved = True                    
                            
                            # get smallest rectangular polygon (with sides parallel to the coordinate axes) that contains the intersection
                            new_box = inter.envelope 
                            
                            # get central point for the new bounding box 
                            centre = new_box.centroid
                            
                            # get coordinates of polygon vertices
                            x, y = new_box.exterior.coords.xy
                            
                            # get bounding box width and height normalized to slice size
                            new_width = (max(x) - min(x)) / slice_size
                            new_height = (max(y) - min(y)) / slice_size
                            
                            # we have to normalize central x and invert y for yolo format
                            new_x = (centre.coords.xy[0][0] - x1) / slice_size
                            new_y = (y1 - centre.coords.xy[1][0]) / slice_size
                            
                            counter += 1

                            slice_labels.append([box[0], new_x, new_y, new_width, new_height])
                    
                    if len(slice_labels) > 0:
                        slice_df = pd.DataFrame(slice_labels, columns=['class', 'x1', 'y1', 'w', 'h'])
                        slice_df.to_csv(slice_labels_path, sep=' ', index=False, header=False, float_format='%.6f')
                    
                    if not imsaved and falsepath:
                        sliced = imr[i*slice_size:(i+1)*slice_size, j*slice_size:(j+1)*slice_size]
                        sliced_im = Image.fromarray(sliced)
                        filename = imname.split('/')[-1]
                        slice_path = falsepath + "/" + filename.replace(ext, f'_{i}_{j}{ext}')                

                        sliced_im.save(slice_path)
                        print('Slice without boxes saved')
                        imsaved = True
        except:
            continue

In [25]:
all_files = []
for root, dir, files in os.walk("tiling\comb_files"):
    for file in files:
        if file.split('.')[-1] == "tif":
            all_files.append(os.path.join(root, file))

In [48]:
tiler(all_files, 'tiling/sliced', None, 512, '.tif')

100%|██████████| 119/119 [00:48<00:00,  2.44it/s]


In [8]:
from sklearn.model_selection import train_test_split

In [6]:
all_imgs = [i.split('.')[0] for i in os.listdir('xview_sliced\images')]

In [9]:
train, test = train_test_split(all_imgs, test_size=0.1, random_state=45)

In [10]:
len(train), len(test)

(8118, 902)

In [11]:
for root, dir, files in os.walk("xview_sliced\images"):
    for file in tqdm(files):
        img_name = file.split('.')[0]
        file_path = os.path.join(root, file)
        if img_name in train:
            shutil.copy2(file_path, 'xview_sliced\\train')
        elif img_name in test:
            shutil.copy2(file_path, 'xview_sliced\\val')
        else:
            print('error_occ')

100%|██████████| 9020/9020 [01:02<00:00, 143.43it/s]


In [12]:
for root, dir, files in os.walk("xview_sliced\labels"):
    for file in tqdm(files):
        img_name = file.split('.')[0]
        file_path = os.path.join(root, file)
        if img_name in train:
            shutil.copy2(file_path, 'xview_sliced\\train')
        elif img_name in test:
            shutil.copy2(file_path, 'xview_sliced\\val')
        else:
            print('error_occ')

100%|██████████| 9016/9016 [00:34<00:00, 260.16it/s]


In [17]:
for root, dir, files in os.walk("D:\RedPositive Internship\Multiclass\yolov7\custom_dataset\images\\train"):
    for file in tqdm(files):
        # img_name = file.split('.')[0]
        file_path = os.path.join(root, file)
        with open('custom_dataset\\train.txt', 'a') as f:
            f.write(file_path + '\n')

100%|██████████| 8118/8118 [00:00<00:00, 16139.19it/s]


In [65]:
with open('custom_dataset\classes.names') as f:
    data = f.read()

In [70]:
data.replace('\n', '')

'  0: Fixed-wing Aircraft  1: Small Aircraft  2: Cargo Plane  3: Helicopter  4: Passenger Vehicle  5: Small Car  6: Bus  7: Pickup Truck  8: Utility Truck  9: Truck  10: Cargo Truck  11: Truck w/Box  12: Truck Tractor  13: Trailer  14: Truck w/Flatbed  15: Truck w/Liquid  16: Crane Truck  17: Railway Vehicle  18: Passenger Car  19: Cargo Car  20: Flat Car  21: Tank car  22: Locomotive  23: Maritime Vessel  24: Motorboat  25: Sailboat  26: Tugboat  27: Barge  28: Fishing Vessel  29: Ferry  30: Yacht  31: Container Ship  32: Oil Tanker  33: Engineering Vehicle  34: Tower crane  35: Container Crane  36: Reach Stacker  37: Straddle Carrier  38: Mobile Crane  39: Dump Truck  40: Haul Truck  41: Scraper/Tractor  42: Front loader/Bulldozer  43: Excavator  44: Cement Mixer  45: Ground Grader  46: Hut/Tent  47: Shed  48: Building  49: Aircraft Hangar  50: Damaged Building  51: Facility  52: Construction Site  53: Vehicle Lot  54: Helipad  55: Storage Tank  56: Shipping container lot  57: Shippi