# Preparing and saving datasets for future work

## `WingsDataset` with resized images

In [1]:
import torch

from wings.config import RAW_DATA_DIR, PROCESSED_DATA_DIR
from wings.dataset import WingsDataset, WingsDatasetRectangleImages, MasksDataset, MaskRectangleDataset
from wings.visualizing.image_preprocess import resize_preprocess, fit_rectangle_preprocess, unet_preprocess, \
    unet_fit_rectangle_preprocess

countries = ['AT', 'GR', 'HR', 'HU', 'MD', 'PL', 'RO', 'SI']

[32m2025-05-29 15:44:42.596[0m | [1mINFO    [0m | [36mwings.config[0m:[36m<module>[0m:[36m40[0m - [1mPROJ_ROOT path is: /home/mkrajew/bees[0m
[32m2025-05-29 15:44:42.683[0m | [1mINFO    [0m | [36mwings.config[0m:[36m<module>[0m:[36m62[0m - [1mtorch.cuda.get_device_name()='NVIDIA RTX A3000 12GB Laptop GPU'[0m


In [None]:
wings_dataset = WingsDataset(countries, RAW_DATA_DIR, resize_preprocess)

train_dataset, val_dataset, test_dataset = wings_dataset.split(0.2, 0.1)

In [None]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(train_dataset) + len(val_dataset) + len(test_dataset))

In [None]:
torch.save(train_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'train_dataset2.pth')
torch.save(val_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'val_dataset2.pth')
torch.save(test_dataset, PROCESSED_DATA_DIR / "resize_datasets" / 'test_dataset2.pth')

## `WingsDatasetRectangleImages` with images resized keeping aspect ratio and padded missing space

In [None]:
wings_rec_dataset = WingsDatasetRectangleImages(countries, RAW_DATA_DIR, fit_rectangle_preprocess)

train_rec_dataset, val_rec_dataset, test_rec_dataset = wings_rec_dataset.split(0.2, 0.1)


In [None]:
torch.save(train_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'train_rec_dataset2.pth')
torch.save(val_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'val_rec_dataset2.pth')
torch.save(test_rec_dataset, PROCESSED_DATA_DIR / "rectangle_datasets" / 'test_rec_dataset2.pth')

# `MaskDataset` for `UNet` model

In [5]:
mask_dataset = MasksDataset(countries, RAW_DATA_DIR, unet_preprocess)

train_mask_dataset, val_mask_dataset, test_mask_dataset = mask_dataset.split(0.2, 0.1)

100%|██████████| 21722/21722 [00:00<00:00, 73543.77it/s] 


In [7]:
torch.save(train_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'square5' / 'train_mask_dataset2.pth')
torch.save(val_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'square5' / 'val_mask_dataset2.pth')
torch.save(test_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'square5' / 'test_mask_dataset2.pth')

Unet was first trained at `..._mask_dataset` and got the following results:
- bad_masks=97
- all masks:	2172.
- bad rate:	4.47%.
- Average MSE over the dataset: 0.469 (calculated at the resized images)
- square size: 5

After that, the implementation of the dataset changed, and the datasets where resaved with the name `..._mask_dataset2`.
Further training was performed on the new datasets.

## square size 3

In [8]:
mask_dataset_s3 = MasksDataset(countries, RAW_DATA_DIR, unet_preprocess, square_size=3)

train_mask_dataset_s3, val_mask_dataset_s3, test_mask_dataset_s3 = mask_dataset_s3.split(0.2, 0.1)


100%|██████████| 21722/21722 [00:00<00:00, 99411.96it/s] 


In [9]:
torch.save(train_mask_dataset_s3, PROCESSED_DATA_DIR / "mask_datasets" / 'square3' / 'train_mask_dataset.pth')
torch.save(val_mask_dataset_s3, PROCESSED_DATA_DIR / "mask_datasets" / 'square3' / 'val_mask_dataset.pth')
torch.save(test_mask_dataset_s3, PROCESSED_DATA_DIR / "mask_datasets" / 'square3' / 'test_mask_dataset.pth')
## square size 3

## Unet rectangle resized mask dataset

In [2]:
square_size = 3
mask_dataset = MaskRectangleDataset(countries, RAW_DATA_DIR, unet_fit_rectangle_preprocess, square_size=square_size)

train_mask_dataset, val_mask_dataset, test_mask_dataset = mask_dataset.split(0.2, 0.1)

100%|██████████| 21722/21722 [00:00<00:00, 88226.83it/s] 


In [3]:
torch.save(train_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'rectangle' / 'train_mask_dataset.pth')
torch.save(val_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'rectangle' / 'val_mask_dataset.pth')
torch.save(test_mask_dataset, PROCESSED_DATA_DIR / "mask_datasets" / 'rectangle' / 'test_mask_dataset.pth')