# Example for loading the iNat2021-OSR pytorch datasets

In [1]:
import numpy as np
import sys
sys.path.append('..')

from datasets.open_set_datasets import get_class_splits, get_datasets




## Loading iNat2021-OSR dataset for hop 1 to 7

Note: Note 'id' stands for fine-grained species labels.


In [2]:
# create datasets
for hop in range(1, 8): 
    dataset_name = f'inat21-osr-aves-id-{hop}hop'
    print("----------------------------------------")
    print(f"Creating dataset {dataset_name}")

    # load the data split ids
    train_classes, open_set_classes = get_class_splits(dataset_name)
    
    # load pytorch datasets
    dataset_dict = get_datasets(dataset_name, transform='visualize', train_classes=train_classes, open_set_classes=open_set_classes, 
                                balance_open_set_eval=True, split_train_val=True, image_size=224)
    
    print("dataset_dict keys: ", dataset_dict.keys())

    # get taxonomy
    taxonomy = dataset_dict["test_known"].dataset.taxonomy

----------------------------------------
Creating dataset inat21-osr-aves-id-1hop
Loading datasets...
Loading annotations from: train.json
	2686843 images
	10000 classes
Loading annotations from: val.json
	100000 images
	10000 classes
Before balancing test datasets
train:	189290
val:	21033
test_known:	7450
test_unknown:	2970
balancing test known and unknown...
After balancing test_known and test_unknown datasets
train:	189290
val:	21033
test_known:	2970
test_unknown:	2970
dataset_dict keys:  dict_keys(['train', 'val', 'test_known', 'test_unknown', 'test_known_all'])
----------------------------------------
Creating dataset inat21-osr-aves-id-2hop
Loading datasets...
Loading annotations from: train.json
	2686843 images
	10000 classes
Loading annotations from: val.json
	100000 images
	10000 classes
Before balancing test datasets
train:	189290
val:	21033
test_known:	7450
test_unknown:	1800
balancing test known and unknown...
After balancing test_known and test_unknown datasets
train:	1892

## Loading iNat2021-OSR with coarser taxonomic labels

We can transform species labels on the fly to coarser tax-levels, e.g. genus, family, order. 

This yields same images, but with coarser taxonomic labels.

The labes are transformed in the target transforms of `InatOSRWrapper.__getitem__()`. 

Note 'id' stands for species (default).


In [3]:
for target_rank in ["id", "genus", "family", "order"]:

    dataset_name = f'inat21-osr-aves-{target_rank}-1hop'
    print("----------------------------------------")
    print(f"Creating dataset {dataset_name}")

    # load the data split ids
    train_classes, open_set_classes = get_class_splits(dataset_name)

    # load pytorch datasets
    dataset_dict = get_datasets(dataset_name, transform='visualize', train_classes=train_classes, open_set_classes=open_set_classes, 
                                balance_open_set_eval=True, split_train_val=True, image_size=224)

    print("dataset_dict keys: ", dataset_dict.keys())

    # print number of unique targests for test_known
    targets = [dataset_dict["test_known"][i]['labels'] for i in range(len(dataset_dict["test_known"]))]
    print(f"With target_rank {target_rank} the number of unique targets in test_known: {len(np.unique(targets))}")


----------------------------------------
Creating dataset inat21-osr-aves-id-1hop
Loading datasets...
Loading annotations from: train.json
	2686843 images
	10000 classes
Loading annotations from: val.json
	100000 images
	10000 classes
Before balancing test datasets
train:	189290
val:	21033
test_known:	7450
test_unknown:	2970
balancing test known and unknown...
After balancing test_known and test_unknown datasets
train:	189290
val:	21033
test_known:	2970
test_unknown:	2970
dataset_dict keys:  dict_keys(['train', 'val', 'test_known', 'test_unknown', 'test_known_all'])
With target_rank id the number of unique targets in test_known: 737
----------------------------------------
Creating dataset inat21-osr-aves-genus-1hop
Loading datasets...
Loading annotations from: train.json
	2686843 images
	10000 classes
Loading annotations from: val.json
	100000 images
	10000 classes
Before balancing test datasets
train:	189290
val:	21033
test_known:	7450
test_unknown:	2970
balancing test known and unkn