In [7]:
from configs.datamodule_configs import DatasetConfig
from datamodule.birdset_datamodule import BirdSetDataModule

# initiate the data module
dm = BirdSetDataModule(
    dataset=DatasetConfig(
        data_dir='./data_birdset/HSN',
        dataset_name='HSN',
        hf_path='DBD-research-group/BirdSet',
        hf_name='HSN',
        n_workers=3,
        val_split=0.2,
        task="multilabel",
        classlimit=500,
        eventlimit=5,
        sampling_rate=32000,
    ),
)
# prepare the data (download dataset, ...)
dm.prepare_data()
# setup the dataloaders
dm.setup(stage="fit")
# get the dataloaders
train_loader = dm.train_dataloader()
# get the first batch
batch = next(iter(train_loader))
# get shape of the batch
print(batch["input_values"].shape)
print(batch["labels"].shape)

Map:   0%|          | 0/38170 [00:00<?, ? examples/s]


Processing labels:   0%|          | 0/21 [00:00<?, ?it/s][A
Processing labels:  33%|███▎      | 7/21 [00:00<00:00, 43.14it/s][A
Processing labels:  62%|██████▏   | 13/21 [00:00<00:00, 34.46it/s][A
Processing labels:  81%|████████  | 17/21 [00:00<00:00, 26.35it/s][A
Processing labels: 100%|██████████| 21/21 [00:00<00:00, 26.21it/s][A


Map (num_proc=3):   0%|          | 0/17940 [00:00<?, ? examples/s]

Map (num_proc=3):   0%|          | 0/12000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/14352 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/3588 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/12000 [00:00<?, ? examples/s]

torch.Size([32, 1, 128, 1024])
torch.Size([32, 21])


In [8]:
from lightning import Trainer
min_epochs = 1
max_epochs = 5
trainer = Trainer(min_epochs=min_epochs, max_epochs=max_epochs, devices=1, fast_dev_run=True)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.


In [9]:
from birdset.modules.base_module import BaseModule
model = BaseModule(
    len_trainset=dm.len_trainset,
    task=dm.task,
    batch_size=dm.train_batch_size,
    num_epochs=max_epochs)

In [10]:
trainer.fit(model, dm)


  | Name                  | Type                   | Params | Mode 
-------------------------------------------------------------------------
0 | loss                  | BCEWithLogitsLoss      | 0      | train
1 | model                 | EfficientNetClassifier | 6.5 M  | train
2 | train_metric          | cmAP                   | 0      | train
3 | valid_metric          | cmAP                   | 0      | train
4 | test_metric           | cmAP                   | 0      | train
5 | valid_metric_best     | MaxMetric              | 0      | train
6 | valid_add_metrics     | MetricCollection       | 0      | train
7 | test_add_metrics      | MetricCollection       | 0      | train
8 | test_complete_metrics | MetricCollection       | 0      | train
-------------------------------------------------------------------------
6.5 M     Trainable params
0         Non-trainable params
6.5 M     Total params
26.158    Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=1` reached.


In [11]:
trainer.test(model, dm)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test/BCEWithLogitsLoss': 0.6845703125,
  'test/cmAP': 0.125,
  'test/MultilabelAUROC': 0.095238097012043,
  'test/T1Accuracy': 0.0,
  'test/T3Accuracy': 0.4166666567325592,
  'test/mAP': 0.036089979112148285}]