In [1]:
from src.data.make_dataset import make_dataset
from src.data.image_loaders import AerialCarsSquareImageLoader, AerialCarsFixedSizeImageLoader, DOTASquareImageLoader
from src.data.dataset_loaders import AerialCarsDatasetLoader, DOTADatasetLoader
from src.features.pipelines import RawImageToImage
from src.features.processors import NormImage, Resize

## Aerial cars dataset

In [3]:
dataset_loader = AerialCarsDatasetLoader(
    image_loader=AerialCarsSquareImageLoader()
)

process_pipeline=RawImageToImage(
    processors=[
        Resize((100, 100)),
        NormImage()
    ]
)

In [4]:
data = make_dataset(
    input_folder_filepath='../../data/raw/aerial-cars-dataset/',
    output_filepath='../../data/processed/aerial-cars-dataset/data.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=0,
    verbose=True
)

100%|██████████| 71/71 [00:01<00:00, 50.91it/s]
100%|██████████| 22/22 [00:00<00:00, 303.29it/s]
100%|██████████| 328/328 [00:00<00:00, 4879.59it/s]
100%|██████████| 656/656 [00:01<00:00, 455.01it/s]


In [6]:
BND_BOX_SIZE = (100, 100)

dataset_loader = AerialCarsDatasetLoader(
    image_loader=AerialCarsFixedSizeImageLoader(
        bnd_box_size=BND_BOX_SIZE
    )
)

process_pipeline=RawImageToImage(
    processors=[
        # Resize((100, 100)),
        NormImage()
    ]
)

In [7]:
data = make_dataset(
    input_folder_filepath='../../data/raw/aerial-cars-dataset/',
    output_filepath='../../data/processed/aerial-cars-dataset/data_fixed_norm.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=BND_BOX_SIZE,
    negative_examples_size=0.5,
    workers=0,
    verbose=True
)

100%|██████████| 72/72 [00:01<00:00, 57.78it/s]
100%|██████████| 22/22 [00:00<00:00, 389.98it/s]
100%|██████████| 328/328 [00:00<00:00, 5714.48it/s]
100%|██████████| 656/656 [00:00<00:00, 11545.64it/s]


## DOTA

In [5]:
dataset_loader = DOTADatasetLoader(
    image_loader=DOTASquareImageLoader(min_side_of_box=40)
)

process_pipeline=RawImageToImage(
    processors=[
        Resize((100, 100)),
        NormImage()
    ]
)

In [4]:
data1 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part1',
    output_filepath='../../data/processed/dota/data1.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

100%|██████████| 528/528 [00:22<00:00, 23.36it/s]
100%|██████████| 146/146 [00:00<00:00, 940.74it/s]
100%|██████████| 3011/3011 [00:00<00:00, 4449.17it/s]


In [5]:
data2 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part2',
    output_filepath='../../data/processed/dota/data2.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

100%|██████████| 509/509 [01:19<00:00,  6.41it/s]
100%|██████████| 73/73 [00:00<00:00, 326.23it/s]
100%|██████████| 4261/4261 [00:00<00:00, 4532.87it/s]


In [6]:
data3 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part3',
    output_filepath='../../data/processed/dota/data3.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

100%|██████████| 513/513 [00:22<00:00, 22.46it/s]
100%|██████████| 31/31 [00:00<00:00, 231.09it/s]
100%|██████████| 1795/1795 [00:00<00:00, 4942.42it/s]


In [7]:
data4 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part4',
    output_filepath='../../data/processed/dota/data4.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

100%|██████████| 508/508 [00:16<00:00, 30.51it/s]
100%|██████████| 113/113 [00:00<00:00, 1938.70it/s]
100%|██████████| 1295/1295 [00:00<00:00, 5342.71it/s]


In [8]:
data5 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part5',
    output_filepath='../../data/processed/dota/data5.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

100%|██████████| 441/441 [00:43<00:00, 10.17it/s]
 61%|██████▏   | 38/62 [00:00<00:00, 378.64it/s]No file ../../data/raw/dota/part5/P1991.txt
100%|██████████| 62/62 [00:00<00:00, 285.86it/s]
100%|██████████| 4302/4302 [00:00<00:00, 4522.40it/s]


In [9]:
data6 = make_dataset(
    input_folder_filepath='../../data/raw/dota/part6',
    output_filepath='../../data/processed/dota/data6.pickle',
    dataset_loader=dataset_loader,
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=process_pipeline,
    negative_images_size=(120, 120),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

 18%|█▊        | 59/323 [00:04<00:19, 13.84it/s]No file ../../data/raw/dota/part6/P2804.txt
100%|██████████| 323/323 [00:36<00:00,  8.94it/s]
100%|██████████| 44/44 [00:00<00:00, 441.33it/s]
100%|██████████| 1948/1948 [00:00<00:00, 4814.97it/s]
