In [6]:
from collections import Counter
from pathlib import Path
from torchvision import datasets


def count_labels_in_imagefolder(dataset_path):
    """Counts the number of images for each label in a PyTorch ImageFolder dataset.

    Args:
        dataset_path (str): Path to the ImageFolder dataset.

    Returns:
        dict: Dictionary with label names as keys and counts as values.
    """
    # Load the dataset
    dataset = datasets.ImageFolder(root=dataset_path)

    # Get the list of labels
    labels = [label for _, label in dataset.samples]

    # Count the occurrences of each label
    label_counts = Counter(labels)

    # Map the label indices to label names
    idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}
    label_counts_named = {
        idx_to_class[idx]: count for idx, count in label_counts.items()
    }

    return label_counts_named


In [7]:
d1_train_path = Path("./data/eksperimen-1/jpg-train/")
d1_train_count = count_labels_in_imagefolder(d1_train_path)

d1_test_path = Path("./data/eksperimen-1/jpg-test/")
d1_test_count = count_labels_in_imagefolder(d1_test_path)

# Get the unique set of labels from both datasets
all_labels = set(d1_train_count.keys()).union(set(d1_test_count.keys()))

# Display the counts side by side
print(f"{'D1':<15} {'Train':<10} {'Test':<10}")
print("-" * 31)
for label in all_labels:
    train_count = d1_train_count.get(label, 0)
    test_count = d1_test_count.get(label, 0)
    print(f"{label:<15} {train_count:<10} {test_count:<10}")


D1              Train      Test      
-------------------------------
tree            159        69        
downdog         223        97        
plank           266        115       
goddess         180        80        
warrior2        252        109       


In [8]:
d2_train_path = Path("./data/eksperimen-2/train-fix/")
d2_train_count = count_labels_in_imagefolder(d2_train_path)

d2_test_path = Path("./data/eksperimen-2/test-fix/")
d2_test_count = count_labels_in_imagefolder(d2_test_path)

# Get the unique set of labels from both datasets
all_labels = set(d2_train_count.keys()).union(set(d2_test_count.keys()))

# Display the counts side by side
print(f"{'D2':<15} {'Train':<10} {'Test':<10}")
print("-" * 31)
for label in all_labels:
    train_count = d2_train_count.get(label, 0)
    test_count = d2_test_count.get(label, 0)
    print(f"{label:<15} {train_count:<10} {test_count:<10}")


D2              Train      Test      
-------------------------------
tree            156        69        
downdog         214        96        
plank           257        115       
goddess         168        78        
warrior2        241        106       


In [9]:
d3_train_path = Path("./data/eksperimen-3/train/")
d3_train_count = count_labels_in_imagefolder(d3_train_path)

d3_test_path = Path("./data/eksperimen-3/test/")
d3_test_count = count_labels_in_imagefolder(d3_test_path)

# Get the unique set of labels from both datasets
all_labels = set(d3_train_count.keys()).union(set(d3_test_count.keys()))

# Display the counts side by side
print(f"{'D3':<15} {'Train':<10} {'Test':<10}")
print("-" * 31)
for label in all_labels:
    train_count = d3_train_count.get(label, 0)
    test_count = d3_test_count.get(label, 0)
    print(f"{label:<15} {train_count:<10} {test_count:<10}")


D3              Train      Test      
-------------------------------
tree            256        100       
downdog         256        100       
plank           256        100       
goddess         256        100       
warrior2        256        100       


In [10]:
def count_labels_in_imagefolder(dataset_path):
    """Counts the number of images for each label in a PyTorch ImageFolder dataset.

    Args:
        dataset_path (str): Path to the ImageFolder dataset.

    Returns:
        dict: Dictionary with label names as keys and counts as values.
    """
    # Load the dataset
    dataset = datasets.ImageFolder(root=dataset_path)

    # Get the list of labels
    labels = [label for _, label in dataset.samples]

    # Count the occurrences of each label
    label_counts = Counter(labels)

    # Map the label indices to label names
    idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}
    label_counts_named = {
        idx_to_class[idx]: count for idx, count in label_counts.items()
    }

    return label_counts_named


# Example usage
d1_train_path = Path("./data/eksperimen-1/jpg-train/")
d1_train_count = count_labels_in_imagefolder(d1_train_path)

d1_test_path = Path("./data/eksperimen-1/jpg-test/")
d1_test_count = count_labels_in_imagefolder(d1_test_path)

d2_train_path = Path("./data/eksperimen-2/train-fix/")
d2_train_count = count_labels_in_imagefolder(d2_train_path)

d2_test_path = Path("./data/eksperimen-2/test-fix/")
d2_test_count = count_labels_in_imagefolder(d2_test_path)

d3_train_path = Path("./data/eksperimen-3/train/")
d3_train_count = count_labels_in_imagefolder(d3_train_path)

d3_test_path = Path("./data/eksperimen-3/test/")
d3_test_count = count_labels_in_imagefolder(d3_test_path)

# Get the unique set of labels from all datasets
all_labels = sorted(
    set(d1_train_count.keys())
    .union(set(d1_test_count.keys()))
    .union(set(d2_train_count.keys()))
    .union(set(d2_test_count.keys()))
    .union(set(d3_train_count.keys()))
    .union(set(d3_test_count.keys()))
)

# Display the counts side by side
header = ["Dataset", "Split"] + all_labels
print(
    f"{'Dataset':<10} {'Split':<10} "
    + " ".join([f"{label:<10}" for label in all_labels])
)
print("-" * (20 + len(all_labels) * 11))


def print_counts(dataset_name, split_name, counts):
    print(
        f"{dataset_name:<10} {split_name:<10} "
        + " ".join([f"{counts.get(label, 0):<10}" for label in all_labels])
    )


print_counts("D1", "Train", d1_train_count)
print_counts("D2", "Train", d2_train_count)
print_counts("D3", "Train", d3_train_count)
print_counts("D1", "Test", d1_test_count)
print_counts("D2", "Test", d2_test_count)
print_counts("D3", "Test", d3_test_count)


Dataset    Split      downdog    goddess    plank      tree       warrior2  
---------------------------------------------------------------------------
D1         Train      223        180        266        159        252       
D2         Train      214        168        257        156        241       
D3         Train      256        256        256        256        256       
D1         Test       97         80         115        69         109       
D2         Test       96         78         115        69         106       
D3         Test       100        100        100        100        100       
