In [2]:
%load_ext autoreload
%autoreload 2

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

from src.deep_ad.config import Config
from src.deep_ad.data.dagm_dataset import DAGMDataset, DAGM_dataset_type
from src.deep_ad.data.dagm_utils import dagm_get_class, dagm_get_image_name
from src.deep_ad.image import show_image_with_label

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Load configuration
cfg = Config()

# DAGM 2007 Dataset

Display some random images from the dataset

In [4]:
dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, type="Defect-only")
limit = 5
while True:
    i = torch.randint(0, len(dagm_dataset), (1,)).item()
    image, label = dagm_dataset[i]
    title = f"Image {
        i} - class {dagm_get_class(dagm_dataset.image_paths[i])} - {dagm_get_image_name(dagm_dataset.image_paths[i])}.png"
    image = cv.cvtColor(image.squeeze().numpy(), cv.COLOR_BGR2RGB)
    label = cv.cvtColor(label.squeeze().numpy(), cv.COLOR_BGR2RGB)
    show_image_with_label(image, label, title)

    limit -= 1
    if limit == 0:
        break

### Dataset statistics

Number of images and labels

In [5]:
class_statistics_df = pd.DataFrame(
    columns=["Class", "Images", "Labels", "Defect-free images", "Percentage of images with defects"]
)
type: DAGM_dataset_type = "Original"

# Add class statistics
for cls in dagm_dataset.classes:
    class_dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, classes=[cls], type=type)
    class_total_images = len(class_dagm_dataset)
    class_total_labels = len(class_dagm_dataset.label_paths)
    class_statistics_df.loc[len(class_statistics_df)] = {
        "Class": cls,
        "Images": class_total_images,
        "Labels": class_total_labels,
        "Defect-free images": class_total_images - class_total_labels,
        "Percentage of images with defects": f"{100 * class_total_labels / class_total_images:.2f}%",
    }

# Add total statistics
dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, type=type)
total_images = len(dagm_dataset)
total_labels = len(dagm_dataset.label_paths)
class_statistics_df.loc["Total"] = {
    "Class": "Total",
    "Images": total_images,
    "Labels": total_labels,
    "Defect-free images": total_images - total_labels,
    "Percentage of images with defects": f"{100 * total_labels / total_images:.2f}%",
}

# Display statistics without the index column
display(class_statistics_df.style.hide())

Class,Images,Labels,Defect-free images,Percentage of images with defects
1,575,79,496,13.74%
2,575,66,509,11.48%
3,575,66,509,11.48%
4,575,82,493,14.26%
5,575,70,505,12.17%
6,575,83,492,14.43%
7,1150,150,1000,13.04%
8,1150,150,1000,13.04%
9,1150,150,1000,13.04%
10,1150,150,1000,13.04%


Statistics related to the sizes of defects expressed as numbers pixels

In [6]:
defects_statistics_df = pd.DataFrame(columns = ["Avg size", "Root avg", "Min size", "Root min", "Max size", "Root max"])
type: DAGM_dataset_type = "Original"
dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, type=type)

# Add statistics for each class
for cls in dagm_dataset.classes:
    class_dagm_dataset = DAGMDataset(img_dir=cfg.DAGM_dir, classes=[cls], type=type)
    label_paths = list(class_dagm_dataset.label_paths.values())
    labels = [cv.imread(path, cv.IMREAD_GRAYSCALE) for path in label_paths]
    defect_surfaces = np.array([label.sum() / 255 for label in labels])

    avg_size = f"{defect_surfaces.mean():.0f}"
    root_avg = f"{np.sqrt(defect_surfaces.mean()):.0f}"
    min_size = f"{defect_surfaces.min():.0f}"
    root_min = f"{np.sqrt(defect_surfaces.min()):.0f}"
    max_size = f"{defect_surfaces.max():.0f}"
    root_max = f"{np.sqrt(defect_surfaces.max()):.0f}"
    defects_statistics_df.loc[cls] = [avg_size, root_avg, min_size, root_min, max_size, root_max]
    
# Add statistics for whole dataset
label_paths = list(dagm_dataset.label_paths.values())
labels = [cv.imread(path, cv.IMREAD_GRAYSCALE) for path in label_paths]
defect_surfaces = np.array([label.sum() / 255 for label in labels])

avg_size = f"{defect_surfaces.mean():.0f}"
root_avg = f"{np.sqrt(defect_surfaces.mean()):.0f}"
min_size = f"{defect_surfaces.min():.0f}"
root_min = f"{np.sqrt(defect_surfaces.min()):.0f}"
max_size = f"{defect_surfaces.max():.0f}"
root_max = f"{np.sqrt(defect_surfaces.max()):.0f}"
defects_statistics_df.loc["Total"] = [avg_size, root_avg, min_size, root_min, max_size, root_max]

display(defects_statistics_df)

Unnamed: 0,Avg size,Root avg,Min size,Root min,Max size,Root max
1,9448,97,3886,62,22926,151
2,3030,55,645,25,16421,128
3,3558,60,1103,33,8571,93
4,7185,85,2260,48,13721,117
5,4826,69,1741,42,8961,95
6,21458,146,13820,118,24523,157
7,9683,98,3853,62,22443,150
8,1437,38,2,1,18743,137
9,972,31,736,27,1241,35
10,3484,59,959,31,12669,113


In [7]:
# TODO - fix this label as it contains only 2 pixels
print(label_paths[defect_surfaces.argmin()])
print(dagm_dataset.get_index_of_image(8, 1951))

C:\Stefan\Facultate\Licenta\Datasets\DAGM\Class8\Train\Label\1951_label.PNG
6550


In [9]:
image, label = dagm_dataset[6550]
# label = cv.imread(label_paths[defect_surfaces.argmin()], cv.IMREAD_GRAYSCALE)
image_np: np.ndarray = cv.cvtColor(image.numpy().squeeze(), cv.COLOR_GRAY2RGB)
label_np: np.ndarray = cv.cvtColor(label.numpy().squeeze(), cv.COLOR_GRAY2RGB)
print(label.shape)
print(np.unique(label))
print(label.sum())
# show_image("Label", label)
show_image_with_label(image_np, label_np, "Label")

torch.Size([1, 512, 512])
[  0 255]
tensor(510)
