In [None]:
import os
import numpy as np
from pathlib import Path

def count_classes_in_masks(masks_dir):
    """
    Analyze one-hot encoded masks and print files containing more than 6 classes.
    
    Args:
        masks_dir (str): Path to directory containing mask files
    """
    masks_path = Path(masks_dir)
    
    if not masks_path.exists():
        raise ValueError(f"Directory not found: {masks_dir}")
    
    print(f"Analyzing masks in {masks_dir}...")
    print("-" * 50)
    
    count = 0
    for mask_file in masks_path.glob("*.npy"):
        try:
            # Load the one-hot encoded mask
            mask = np.load(mask_file)
            
            # If mask is in one-hot format, last dimension should be number of classes
            if mask.ndim < 3:
                print(f"Warning: Unexpected mask format in {mask_file.name}")
                continue
                
            num_classes = mask.shape[-1]
            
            # Check if mask contains more than 6 classes
            if num_classes > 6:
                count += 1
                print(f"File: {mask_file.name}")
                print(f"Number of classes: {num_classes}")
                print("-" * 30)
        
        except Exception as e:
            print(f"Error processing {mask_file.name}: {str(e)}")
    
    print("-" * 50)
    print(f"Found {count} masks with more than 6 classes")

if __name__ == "__main__":
    # Update this path to match your directory structure
    masks_dir = "../data/processed/train/masks"
    count_classes_in_masks(masks_dir)

In [None]:
import os
from pathlib import Path
from PIL import Image
import numpy as np

def analyze_green_channel(image_dir):
    """
    Analyze images and print files containing more than 6 unique values in the green channel.
    
    Args:
        image_dir (str): Path to directory containing image files
    """
    image_path = Path(image_dir)
    
    # Verify directory exists
    if not image_path.exists():
        raise ValueError(f"Directory not found: {image_dir}")
    
    print(f"Analyzing green channel in images from {image_dir}...")
    print("-" * 50)
    
    # Counter for images with more than 6 green values
    count = 0
    
    # Common image extensions
    extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp')
    
    # Analyze each image file in the directory
    for img_file in image_path.glob("*"):
        if img_file.suffix.lower() not in extensions:
            continue
            
        try:
            # Load the image
            with Image.open(img_file) as img:
                # Convert to RGB if not already
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                
                # Convert to numpy array and extract green channel
                img_array = np.array(img)
                green_channel = img_array[:, :, 1]  # Green is index 1 in RGB
                
                # Get unique values in green channel
                unique_values = np.unique(green_channel)
                num_values = len(unique_values)
                
                # Check if green channel contains more than 6 unique values
                if num_values > 6:
                    count += 1
                    print(f"File: {img_file.name}")
                    print(f"Number of unique green values: {num_values}")
                    print(f"Unique green values: {sorted(unique_values.tolist())}")
                    print("-" * 30)
        
        except Exception as e:
            print(f"Error processing {img_file.name}: {str(e)}")
    
    print("-" * 50)
    print(f"Found {count} images with more than 6 unique green values")

if __name__ == "__main__":
    # Update this path to match your directory structure
    image_dir = "../data/processed/train/masks"
    analyze_green_channel(image_dir)