In [1]:
from pathlib import Path

from torchrs.datasets import RSICD

from src.paths import DATASET_DIR
from src.utils import extract_rsicd_dataset
from src.vocab import Vocab

In [2]:
def summary(dataset: RSICD) -> None:
    vocab = Vocab(dataset)
    print("Number of samples:", len(dataset))
    print("Image size:", dataset[0]["x"].size())
    print("Number of captions:", len(dataset) * 5)
    print("Number of captions per sample:", len(dataset[0]["captions"]))
    print("Number of tokens:", len(vocab))
    print("Number of words:", sum(freq[1] for freq in vocab.frequencies))
    print("10 most frequent tokens:", vocab.frequencies[:10])

In [3]:
extract_rsicd_dataset(Path(r"RSICD.zip"))

train_dataset = RSICD(root=DATASET_DIR, split="train")
val_dataset = RSICD(root=DATASET_DIR, split="val")
test_dataset = RSICD(root=DATASET_DIR, split="test")

Unzipping...
Dataset extracted.


In [4]:
summary(train_dataset)

Number of samples: 8734
Image size: torch.Size([3, 224, 224])
Number of captions: 43670
Number of captions per sample: 5
Number of tokens: 2606
Number of words: 508127
10 most frequent tokens: [('.', 43670), ('a', 41732), ('are', 25698), ('green', 21088), ('many', 18872), ('in', 17510), ('trees', 17268), ('and', 16626), ('of', 15084), ('the', 14157)]


In [5]:
summary(val_dataset)

Number of samples: 1094
Image size: torch.Size([3, 224, 224])
Number of captions: 5470
Number of captions per sample: 5
Number of tokens: 1171
Number of words: 54356
10 most frequent tokens: [('.', 5469), ('a', 4226), ('are', 2763), ('green', 2308), ('the', 2071), ('many', 1926), ('in', 1895), ('trees', 1640), ('of', 1552), ('is', 1521)]


In [6]:
summary(test_dataset)

Number of samples: 1093
Image size: torch.Size([3, 224, 224])
Number of captions: 5465
Number of captions per sample: 5
Number of tokens: 1565
Number of words: 67593
10 most frequent tokens: [('.', 5465), ('the', 4386), ('a', 4271), ('is', 2505), ('of', 1922), ('are', 1721), ('with', 1528), ('and', 1522), ('in', 1389), ('there', 1159)]
