In [12]:
%load_ext autoreload
%autoreload 2

import cv2 as cv
import pandas as pd
import torch

from src.deep_ad.config import Config
from src.deep_ad.data.dagm_dataset import DAGMDataset, dagm_get_class, dagm_get_image_key, dagm_get_image_name, dagm_get_label_key, dagm_get_label_name
from src.deep_ad.image import show_image_with_label

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
# Load configuration
cfg = Config()

#### DAGM 2007 Dataset


In [4]:
# Display some random images from the dataset
dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir)
limit = 1
while True:
    i = torch.randint(0, len(dagm_dataset), (1,)).item()
    image, label = dagm_dataset[i]
    if torch.all(label == 0):
        continue
    title = f"Image {
        i} - class {dagm_get_class(dagm_dataset.image_paths[i])} - {dagm_get_image_name(dagm_dataset.image_paths[i])}.png"
    image = cv.cvtColor(image.squeeze().numpy(), cv.COLOR_BGR2RGB)
    label = cv.cvtColor(label.squeeze().numpy(), cv.COLOR_BGR2RGB)
    show_image_with_label(image, label, title)

    limit -= 1
    if limit == 0:
        break

In [49]:
# Dataset statistics

class_statistics_df = pd.DataFrame(columns=[
    "Class", 
    "Number of images", 
    "Number of labels", 
    "Number of defect-free images", 
    "Percentage of images with defects"
])

# Add class statistics
for cls in dagm_dataset.classes:
    class_dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, class_index=cls)
    class_total_images = len(class_dagm_dataset)
    class_total_labels = len(class_dagm_dataset.label_paths)
    class_statistics_df.loc[len(class_statistics_df)] = {
        "Class": cls,
        "Number of images": class_total_images,
        "Number of labels": class_total_labels,
        "Number of defect-free images": class_total_images - class_total_labels,
        "Percentage of images with defects": f"{100 * class_total_labels / class_total_images:.2f}%"
    }

# Add total statistics
dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir)
total_images = len(dagm_dataset)
total_labels = len(dagm_dataset.label_paths)
class_statistics_df.loc["Total"] = {
    "Class": "Total",
    "Number of images": total_images,
    "Number of labels": total_labels,
    "Number of defect-free images": total_images - total_labels,
    "Percentage of images with defects": f"{100 * total_labels / total_images:.2f}%"
}

# Display statistics without the index column
display(class_statistics_df.style.hide())

Class,Number of images,Number of labels,Number of defect-free images,Percentage of images with defects
1,575,79,496,13.74%
2,575,66,509,11.48%
3,575,66,509,11.48%
4,575,82,493,14.26%
5,575,70,505,12.17%
6,575,83,492,14.43%
7,1150,150,1000,13.04%
8,1150,150,1000,13.04%
9,1150,150,1000,13.04%
10,1150,150,1000,13.04%
