In [7]:
import numpy as np
import pandas as pd

import torch

from sklearn.cluster import KMeans

from transformers import AutoImageProcessor
from datasets import Dataset, DatasetDict, Features, Image, Sequence, Value, concatenate_datasets
import albumentations as A
from albumentations.pytorch import ToTensorV2
import PIL
import cv2

from tqdm import tqdm

import os

In [2]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
data_path = "dataset_segmented/"

In [3]:
train_df = pd.read_csv(data_path + "train/_classes.csv")
valid_df = pd.read_csv(data_path + "valid/_classes.csv")
test_df = pd.read_csv(data_path + "test/_classes.csv")

In [4]:
train_df

Unnamed: 0,filename,labels
0,image_00000.png,"[0.0, 0.0, 1.0]"
1,image_00001.png,"[1.0, 1.0, 1.0]"
2,image_00002.png,"[0.0, 1.0, 0.0]"
3,image_00003.png,"[0.0, 1.0, 0.0]"
4,image_00004.png,"[0.0, 1.0, 1.0]"
...,...,...
614,image_00614.png,"[1.0, 1.0, 1.0]"
615,image_00615.png,"[1.0, 1.0, 0.0]"
616,image_00616.png,"[1.0, 0.0, 1.0]"
617,image_00617.png,"[1.0, 1.0, 1.0]"


In [5]:
def extract_lab_features(image):
    # Identify black pixels in the original image (RGB values are all zero)
    black_mask = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) == 0
    # Create an inverse mask for non-black pixels
    non_black_mask = ~black_mask

    lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    L_channel, A_channel, B_channel = cv2.split(lab_image)

    # Apply the non-black mask to each channel
    L_non_black = L_channel[non_black_mask]
    A_non_black = A_channel[non_black_mask]
    B_non_black = B_channel[non_black_mask]

    if L_non_black.size == 0:
        l_mean = 0
        a_mean = 0
        b_mean = 0
    else:
        l_mean = np.mean(L_non_black)
        a_mean = np.mean(A_non_black)
        b_mean = np.mean(B_non_black)

    return l_mean, a_mean, b_mean

In [9]:
splits = ['train', 'valid', 'test']

In [None]:
for split in splits:
    base_image_path = f'dataset_segmented/{split}/'
    images = os.listdir(base_image_path)
    filenames = []
    l_values = []
    a_values = []
    b_values = []

    for image_file in tqdm(images, desc="Parsing images"):
        if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
            full_image_path = os.path.join(base_image_path, image_file)
            image = cv2.imread(full_image_path)
            if image is None:
                print(f"Error: Unable to read image at {full_image_path}")
                continue
            
            l_mean, a_mean, b_mean = extract_lab_features(image)
            
            filenames.append(full_image_path)
            l_values.append(l_mean)
            a_values.append(a_mean)
            b_values.append(b_mean)

    df = pd.DataFrame({
        'image': filenames,
        'L': l_values,
        'A': a_values,
        'B': b_values
    })

    X = df[['A', 'B']]
    kmeans = KMeans(n_clusters=6, random_state=0)
    kmeans.fit(X)
    df['cluster'] = kmeans.labels_

    # Save to .csv (optional)
    df.to_csv(f'6_image_clusters_{split}.csv', index=False)
    

Parsing images: 100%|██████████| 620/620 [00:13<00:00, 45.53it/s]
Parsing images: 100%|██████████| 182/182 [00:03<00:00, 48.21it/s]
Parsing images: 100%|██████████| 95/95 [00:01<00:00, 50.91it/s]


In [14]:
# Append image path to filename
def add_image_path(df, split):
    df['image'] = data_path + split + "/" + split + '_' + df['filename']
    df = df[df['image'].apply(os.path.exists)]
    return df[['image', 'labels']]

train_dataset_df = add_image_path(train_df, "train")
valid_dataset_df = add_image_path(valid_df, "valid")
test_dataset_df = add_image_path(test_df, "test")

In [15]:
train_dataset_df

Unnamed: 0,image,labels
0,dataset_segmented/train/train_image_00000.png,"[0.0, 0.0, 1.0]"
1,dataset_segmented/train/train_image_00001.png,"[1.0, 1.0, 1.0]"
2,dataset_segmented/train/train_image_00002.png,"[0.0, 1.0, 0.0]"
3,dataset_segmented/train/train_image_00003.png,"[0.0, 1.0, 0.0]"
4,dataset_segmented/train/train_image_00004.png,"[0.0, 1.0, 1.0]"
...,...,...
614,dataset_segmented/train/train_image_00614.png,"[1.0, 1.0, 1.0]"
615,dataset_segmented/train/train_image_00615.png,"[1.0, 1.0, 0.0]"
616,dataset_segmented/train/train_image_00616.png,"[1.0, 0.0, 1.0]"
617,dataset_segmented/train/train_image_00617.png,"[1.0, 1.0, 1.0]"


In [32]:
train_clusters = pd.read_csv('6_image_clusters_train.csv')
valid_clusters = pd.read_csv('6_image_clusters_valid.csv')
test_clusters = pd.read_csv('6_image_clusters_test.csv')

In [33]:
merged_df_train = train_dataset_df.merge(train_clusters, on='image')
merged_df_valid = valid_dataset_df.merge(valid_clusters, on='image')
merged_df_test = test_dataset_df.merge(test_clusters, on='image')

In [34]:
import ast

merged_df_train['labels'] = merged_df_train['labels'].apply(ast.literal_eval)
merged_df_valid['labels'] = merged_df_valid['labels'].apply(ast.literal_eval)
merged_df_test['labels'] = merged_df_test['labels'].apply(ast.literal_eval)

In [35]:
merged_df_test.loc[0]['labels']

[0.0, 1.0, 0.0]

In [36]:
merged_df_train['labels'] = merged_df_train.apply(lambda row: row['labels'] + [float(row['cluster'])], axis=1)
merged_df_valid['labels'] = merged_df_valid.apply(lambda row: row['labels'] + [float(row['cluster'])], axis=1)
merged_df_test['labels'] = merged_df_test.apply(lambda row: row['labels'] + [float(row['cluster'])], axis=1)

In [42]:
merged_df_train.drop(columns=['L', 'A', 'B', 'cluster'], inplace=True)
merged_df_valid.drop(columns=['L', 'A', 'B', 'cluster'], inplace=True)
merged_df_test.drop(columns=['L', 'A', 'B', 'cluster'], inplace=True)

In [43]:
merged_df_train

Unnamed: 0,image,labels
0,dataset_segmented/train/train_image_00000.png,"[0.0, 0.0, 1.0, 2.0]"
1,dataset_segmented/train/train_image_00001.png,"[1.0, 1.0, 1.0, 2.0]"
2,dataset_segmented/train/train_image_00002.png,"[0.0, 1.0, 0.0, 4.0]"
3,dataset_segmented/train/train_image_00003.png,"[0.0, 1.0, 0.0, 4.0]"
4,dataset_segmented/train/train_image_00004.png,"[0.0, 1.0, 1.0, 0.0]"
...,...,...
614,dataset_segmented/train/train_image_00614.png,"[1.0, 1.0, 1.0, 3.0]"
615,dataset_segmented/train/train_image_00615.png,"[1.0, 1.0, 0.0, 1.0]"
616,dataset_segmented/train/train_image_00616.png,"[1.0, 0.0, 1.0, 2.0]"
617,dataset_segmented/train/train_image_00617.png,"[1.0, 1.0, 1.0, 1.0]"


In [46]:
merged_df_train.loc[0]['labels']

[0.0, 0.0, 1.0, 2.0]

In [44]:
num_classes = 4
class_names = ['Crack', 'Red-Dots', 'Toothmark', 'Color']

# Labels is an array of floats
features = Features({
    'image': Image(),
    'labels': Sequence(feature=Value('float32'), length=num_classes)
})

In [47]:
train_dataset = Dataset.from_pandas(merged_df_train, features=features, preserve_index=False)
valid_dataset = Dataset.from_pandas(merged_df_valid, features=features, preserve_index=False)
test_dataset = Dataset.from_pandas(merged_df_test, features=features, preserve_index=False)

In [51]:
dataset = DatasetDict({'train': train_dataset, 'validation': valid_dataset, 'test': test_dataset})

In [52]:
dataset['train'][0]

{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x640>,
 'labels': [0.0, 0.0, 1.0, 2.0]}