In [1]:
import os
import pandas as pd
import numpy as np
from cleanlab import Datalab

In [2]:
features = np.load("features.npy")
pred_probs = np.load("pred_probs.npy")
true_labels = np.load("labels.npy")[:, 0]
image_paths = np.load("image_paths.npy", allow_pickle=True)

In [3]:
data = pd.DataFrame({
    "id": np.arange(len(image_paths)),  # IDs match dataset order
    "label": true_labels,  # True labels
    "image_path": image_paths  # Store image paths for visualization
})

In [None]:
# Initialize Cleanlab
lab = Datalab(data=data, label_name="label")

# Detect label issues
lab.find_issues(features=features, pred_probs=pred_probs)

# Generate report
lab.report()

# Retrieve problem indices
problematic_samples = lab.get_issues()

# Get image file paths of problematic samples
problematic_images = data.loc[problematic_samples.index, "image_path"]

print("Problematic images:")
print(problematic_images.head())

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

# Display top 5 problematic images
for img_path in problematic_images.head(5):
    img = Image.open(img_path)
    plt.imshow(img)
    plt.title(f"Problematic Image: {os.path.basename(img_path)}")
    plt.axis("off")
    plt.show()