In [None]:
# Setup environment dan import
%pip install pyyaml tqdm termcolor

# Import module
from handlers.dataset_cleanup import DatasetCleanupHandler
from utils.logger import SmartCashLogger

# Inisialisasi logger
logger = SmartCashLogger("cleanup_demo")

# Buat instance DatasetCleanupHandler
cleanup_handler = DatasetCleanupHandler(
    config_path="configs/base_config.yaml",
    data_dir="data",
    backup_dir="backup",
    logger=logger
)

# Tampilkan konfigurasi cleanup patterns
print("Pola file yang akan dibersihkan:")
for pattern in cleanup_handler.config['cleanup']['augmentation_patterns']:
    print(f"- {pattern}")

# Jalankan pembersihan
stats = cleanup_handler.cleanup(
    augmented_only=True,  # Hanya hapus file augmentasi
    create_backup=True    # Buat backup sebelum menghapus
)

# Visualisasi hasil dengan matplotlib
import matplotlib.pyplot as plt

# Data untuk plot
categories = ['Sebelum', 'Dihapus', 'Setelah']
images = [stats['before']['images'], 
          stats['removed']['images'], 
          stats['after']['images']]
labels = [stats['before']['labels'], 
          stats['removed']['labels'], 
          stats['after']['labels']]

# Buat bar plot
x = range(len(categories))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar([i - width/2 for i in x], images, width, label='Gambar', color='skyblue')
ax.bar([i + width/2 for i in x], labels, width, label='Label', color='lightgreen')

ax.set_ylabel('Jumlah File')
ax.set_title('Statistik Pembersihan Dataset')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()

plt.show()