# Preparing and saving datasets for future work

## `WingsDataset` with resized images

In [2]:
import torch

from wings.config import RAW_DATA_DIR, PROCESSED_DATA_DIR
from wings.dataset import WingsDataset, WingsDatasetRectangleImages, MasksDataset
from wings.visualizing.image_preprocess import resize_preprocess, fit_rectangle_preprocess, unet_preprocess

countries = ['AT', 'GR', 'HR', 'HU', 'MD', 'PL', 'RO', 'SI']

[32m2025-04-13 19:38:05.785[0m | [1mINFO    [0m | [36mwings.config[0m:[36m<module>[0m:[36m40[0m - [1mPROJ_ROOT path is: /home/mkrajew/bees[0m
[32m2025-04-13 19:38:05.887[0m | [1mINFO    [0m | [36mwings.config[0m:[36m<module>[0m:[36m62[0m - [1mtorch.cuda.get_device_name()='NVIDIA RTX A3000 12GB Laptop GPU'[0m


In [8]:
wings_dataset = WingsDataset(countries, RAW_DATA_DIR, resize_preprocess)

train_dataset, val_dataset, test_dataset = wings_dataset.split(0.2, 0.1)

100%|██████████| 21722/21722 [00:00<00:00, 111033.66it/s]


In [9]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(train_dataset) + len(val_dataset) + len(test_dataset))

15206
4344
2172
21722


In [4]:
torch.save(train_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'train_dataset2.pth')
torch.save(val_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'val_dataset2.pth')
torch.save(test_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'test_dataset2.pth')

## `WingsDatasetRectangleImages` with images resized keeping aspect ratio and padded missing space

In [10]:
wings_rec_dataset = WingsDatasetRectangleImages(countries, RAW_DATA_DIR, fit_rectangle_preprocess)

train_rec_dataset, val_rec_dataset, test_rec_dataset = wings_rec_dataset.split(0.2, 0.1)


100%|██████████| 21722/21722 [00:00<00:00, 116024.31it/s]


In [11]:
torch.save(train_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'train_rec_dataset2.pth')
torch.save(val_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'val_rec_dataset2.pth')
torch.save(test_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'test_rec_dataset2.pth')

# `MaskDataset` for `UNet` model

In [3]:
mask_dataset = MasksDataset(countries, RAW_DATA_DIR, unet_preprocess)

train_mask_dataset, val_mask_dataset, test_mask_dataset = mask_dataset.split(0.2, 0.1)

100%|██████████| 21722/21722 [00:00<00:00, 105847.64it/s]


In [4]:
torch.save(train_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'train_mask_dataset.pth')
torch.save(val_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'val_mask_dataset.pth')
torch.save(test_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'test_mask_dataset.pth')