# Count Images and CSV Rows in Mask+Jaw Dataset

This notebook provides an interactive interface to count:
- Images in `images/` folders
- Images in `labels/tongue/` folders
- Rows in `labels/jaw/jaw.csv` files (excluding headers)

You can adjust the parameters below and run the cells to get the counts.

In [1]:
from pathlib import Path
import csv
import pandas as pd

## Configuration Parameters

Set your parameters here:

In [2]:
# Root directory containing parent folders (e.g., 1_bottom, 1_side, etc.)
ROOT_DIR = r'C:\Users\wanglab\Desktop\Mask+Jaw'

# Image extensions to include (with or without leading dot)
IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.tif', '.tiff', '.bmp', '.gif', '.webp']

# Optional: Save results to CSV file (set to None to skip)
OUTPUT_CSV = 'results.csv'  # Example: 'results.csv' or None

## Helper Functions

In [3]:
def is_image(p: Path, suffixes):
    """Check if a path is an image file based on extension."""
    return p.is_file() and p.suffix.lower() in suffixes


def count_images_in_dir(dirpath: Path, suffixes):
    """Count image files in a directory."""
    if not dirpath.exists() or not dirpath.is_dir():
        return 0
    return sum(1 for p in dirpath.iterdir() if is_image(p, suffixes))


def count_csv_rows(csv_path: Path):
    """Count rows in CSV file, excluding header (returns count - 1).
    
    Automatically detects delimiter and handles different line endings.
    """
    if not csv_path.exists() or not csv_path.is_file():
        return 0
    try:
        # Read file with universal newlines to handle different line endings
        with open(csv_path, 'r', encoding='utf-8', newline='') as f:
            # Read a sample to detect the delimiter
            sample = f.read(8192)  # Read up to 8KB for detection
            f.seek(0)  # Reset to beginning
            
            # Use csv.Sniffer to detect delimiter
            try:
                sniffer = csv.Sniffer()
                dialect = sniffer.sniff(sample, delimiters=',;\t|')
                reader = csv.reader(f, dialect=dialect)
            except csv.Error:
                # If sniffer fails, try common delimiters manually
                f.seek(0)
                first_line = f.readline()
                f.seek(0)
                
                # Count occurrences of common delimiters
                delimiters = [',', ';', '\t', '|']
                delimiter_counts = [(d, first_line.count(d)) for d in delimiters]
                best_delimiter = max(delimiter_counts, key=lambda x: x[1])[0]
                
                reader = csv.reader(f, delimiter=best_delimiter)
            
            # Count non-empty rows
            row_count = 0
            for row in reader:
                # Skip completely empty rows
                if any(cell.strip() for cell in row):
                    row_count += 1
            
            return max(0, row_count - 1)  # subtract header
    except Exception as e:
        print(f'Warning: Could not read {csv_path}: {e}')
        return 0


## Run the Counting Process

In [4]:
# Validate root directory
root = Path(ROOT_DIR)
if not root.exists():
    raise FileNotFoundError(f'Root path does not exist: {root}')
if not root.is_dir():
    raise NotADirectoryError(f'Root is not a directory: {root}')

# Normalize extensions
suffixes = set()
for ext in IMAGE_EXTENSIONS:
    if not ext.startswith('.'):
        ext = '.' + ext
    suffixes.add(ext.lower())

print(f"Scanning directory: {root}")
print(f"Looking for extensions: {', '.join(sorted(suffixes))}")
print()

Scanning directory: C:\Users\wanglab\Desktop\Mask+Jaw
Looking for extensions: .bmp, .gif, .jpeg, .jpg, .png, .tif, .tiff, .webp



In [5]:
# Collect data from each parent folder
data = []
total_images = 0
total_tongue = 0
total_jaw_rows = 0

# Iterate through immediate subdirectories of root
children = sorted([p for p in root.iterdir() if p.is_dir()])

if not children:
    print("No subdirectories found in the root directory.")
else:
    for child in children:
        images_dir = child / 'images'
        labels_tongue_dir = child / 'labels' / 'tongue'
        jaw_csv_path = child / 'labels' / 'jaw' / 'jaw.csv'
        
        images_count = count_images_in_dir(images_dir, suffixes)
        tongue_count = count_images_in_dir(labels_tongue_dir, suffixes)
        jaw_rows = count_csv_rows(jaw_csv_path)
        
        data.append({
            'Folder': child.name,
            'Images': images_count,
            'Tongue': tongue_count,
            'Jaw CSV Rows': jaw_rows
        })
        
        total_images += images_count
        total_tongue += tongue_count
        total_jaw_rows += jaw_rows
    
    print(f"Found {len(children)} parent folders.")

Found 69 parent folders.


## Display Results

In [6]:
# Create DataFrame for easy viewing
df = pd.DataFrame(data)

# Add totals row
totals = pd.DataFrame([{
    'Folder': 'TOTAL',
    'Images': total_images,
    'Tongue': total_tongue,
    'Jaw CSV Rows': total_jaw_rows
}])

df_with_totals = pd.concat([df, totals], ignore_index=True)

# Display the table
print("\n" + "="*70)
print("Results:")
print("="*70)
display(df_with_totals)

print(f"\nSummary:")
print(f"  Total parent folders: {len(data)}")
print(f"  Total images: {total_images}")
print(f"  Total tongue labels: {total_tongue}")
print(f"  Total jaw CSV rows: {total_jaw_rows}")


Results:


Unnamed: 0,Folder,Images,Tongue,Jaw CSV Rows
0,10_bottom,29,29,0
1,10_side,38,38,0
2,11_bottom,20,20,0
3,11_side,46,46,0
4,12_bottom,21,21,0
...,...,...,...,...
65,TL0042 corrections 20250917,108,108,108
66,TL0042 corrections 20250918,245,245,245
67,TL0125_20251008 Corrections,311,311,311
68,TL0128_20251008 Correction,110,110,110



Summary:
  Total parent folders: 69
  Total images: 7030
  Total tongue labels: 7030
  Total jaw CSV rows: 3304


## Optional: Save to CSV

In [7]:
if OUTPUT_CSV:
    try:
        df.to_csv(OUTPUT_CSV, index=False)
        print(f"‚úì Results saved to: {OUTPUT_CSV}")
    except Exception as e:
        print(f"‚úó Failed to save CSV: {e}")
else:
    print("No output CSV specified. Set OUTPUT_CSV in the configuration cell to save results.")

‚úì Results saved to: results.csv


## Optional: Detailed View of Each Folder

Run this cell to see details about each folder's structure:

In [None]:
print("Folder Details:")
print("="*80)

for child in children:
    images_dir = child / 'images'
    labels_tongue_dir = child / 'labels' / 'tongue'
    jaw_csv_path = child / 'labels' / 'jaw' / 'jaw.csv'
    
    print(f"\nüìÅ {child.name}")
    print(f"   images/: {'‚úì' if images_dir.exists() else '‚úó'} exists")
    print(f"   labels/tongue/: {'‚úì' if labels_tongue_dir.exists() else '‚úó'} exists")
    print(f"   labels/jaw/jaw.csv: {'‚úì' if jaw_csv_path.exists() else '‚úó'} exists")