In [19]:
import os

def count_files_in_directory(directory):
    file_count = 0
    for root, dirs, files in os.walk(directory):
        file_count += len(files)
    return file_count

directory_path = "../data/processed/"

In [20]:
count_files_in_directory(directory_path)

12679

In [7]:
import os
import numpy as np
import tifffile as tiff
import matplotlib.pyplot as plt
from pathlib import Path


def load_tif_image(filepath):
    """Load a 4-band TIFF image and return as a NumPy array."""
    img = tiff.imread(filepath)  # Shape: (H, W, 4)
    print(img.dtype)
    
    return img.astype(np.uint16)

def plot_images(image, title_prefix, save_path):
    """Generate and save multiple visualizations for a 4-band image."""
    bands = ['GRE', 'RED', 'REDEDGE', 'NIR']
    
    # Plot individual bands
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    for i in range(4):
        ax = axes[i // 2, i % 2]
        ax.imshow(image[:, :, i], cmap='gray')
        ax.set_title(f"{title_prefix} - {bands[i]}")
        ax.axis("off")
    
    # False Color Composite (NIR, RED, GREEN)
    false_color = np.stack([image[:, :, 3], image[:, :, 1], image[:, :, 0]], axis=-1)  # (H, W, 3)
    print(image.shape)
    false_color = false_color / np.max(false_color)  # Normalize for visualization
    
    ax_false = axes[0, 2]
    ax_false.imshow(false_color)
    ax_false.set_title(f"{title_prefix} - False Color (NIR, RED, GREEN)")
    ax_false.axis("off")
    
    # Zoomed-in False Color (20x)
    zoom_factor = 20
    h, w, _ = image.shape
    center_x, center_y = w // 2, h // 2
    zoom_size_x, zoom_size_y = w // (2 * zoom_factor), h // (2 * zoom_factor)
    
    zoomed = false_color[
        center_y - zoom_size_y:center_y + zoom_size_y,
        center_x - zoom_size_x:center_x + zoom_size_x,
        :
    ]
        
    ax_zoom = axes[1, 2]
    ax_zoom.imshow(zoomed)
    ax_zoom.set_title(f"{title_prefix} - False Color Zoomed (20x)")
    ax_zoom.axis("off")

    # Save the visualization
    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.close()

def plot_histogram(image, title_prefix, save_path):
    """Plot histograms for all 4 bands and save as an image."""
    bands = ['GRE', 'RED', 'REDEDGE', 'NIR']
    fig, ax = plt.subplots(figsize=(8, 6))

    for i in range(4):
        ax.hist(image[:, :, i].ravel(), bins=256, alpha=0.6, label=bands[i], histtype='step')

    ax.set_title(f"{title_prefix} - Histogram of Bands")
    ax.set_xlabel("Pixel Intensity")
    ax.set_ylabel("Frequency")
    ax.legend()
    
    plt.savefig(save_path, dpi=300)
    plt.close()


def process_directory(root_dir, output_dir):
    """Process all subdirectories containing TIFF files and save outputs to the given directory."""
    root_path = Path(root_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)  # Ensure output directory exists

    for sub_dir in root_path.iterdir():
        if sub_dir.is_dir():
            process_single_subfolder(root_dir, sub_dir.name, output_dir)

def process_single_subfolder(root_dir, subfolder_name, output_dir):
    """Process a specific subfolder, handling cases with one or multiple images."""
    sub_path = Path(root_dir) / subfolder_name
    if not sub_path.is_dir():
        print(f"Subfolder {subfolder_name} not found in {root_dir}")
        return
    
    tif_files = sorted(sub_path.glob("*.tif"))  # Sort to get first and last
    if len(tif_files) == 0:
        print(f"Skipping {subfolder_name}: No TIFF files found.")
        return
    
    first_tif = tif_files[0]  # Always process the first image
    last_tif = tif_files[-1] if len(tif_files) > 1 else None  # Only set last if there's more than one
    
    print(f"Processing {subfolder_name}: {first_tif.name}" + (f", {last_tif.name}" if last_tif else " (Only 1 image)"))

    # Load the first image
    img_first = load_tif_image(first_tif)

    # Create a subfolder in the output directory
    sub_output_dir = Path(output_dir) / subfolder_name
    sub_output_dir.mkdir(parents=True, exist_ok=True)

    # Define save paths
    save_path_first = sub_output_dir / f"{subfolder_name}_first.png"
    save_path_hist_first = sub_output_dir / f"{subfolder_name}_hist_first.png"

    # Generate and save plots for the first image
    plot_images(img_first, f"{subfolder_name} - First", save_path_first)
    plot_histogram(img_first, f"{subfolder_name} - First", save_path_hist_first)

    print(f"Saved plots for {subfolder_name} in {sub_output_dir}")

    # If there's a second image, process it as well
    if last_tif and last_tif != first_tif:
        img_last = load_tif_image(last_tif)
        save_path_last = sub_output_dir / f"{subfolder_name}_last.png"
        save_path_hist_last = sub_output_dir / f"{subfolder_name}_hist_last.png"
        
        plot_images(img_last, f"{subfolder_name} - Last", save_path_last)
        plot_histogram(img_last, f"{subfolder_name} - Last", save_path_hist_last)

        print(f"Saved additional plots for last image in {sub_output_dir}")


In [12]:

# Set your paths here
directory_path = "../data/processed"  # Change to your actual input directory
output_directory = "../data/outputs"  # Change to your desired output directory
os.makedirs(output_directory, exist_ok=True)
process_directory(directory_path, output_directory)

In [11]:

# Set your paths here
directory_path = "../data/processed"  # Change to your actual input directory
output_directory = "../data/outputs"  # Change to your desired output directory
os.makedirs(output_directory, exist_ok=True)
process_single_subfolder(directory_path, "rededge_botrytis_b", output_directory)

Processing rededge_botrytis_b: REDEDGE_SUNCAL_rededge_botrytis_b_IMG_0144_2.tif.tif, REDEDGE_SUNCAL_rededge_botrytis_b_IMG_0669_2.tif.tif
uint16
(960, 1280, 4)
Saved plots for rededge_botrytis_b in ..\data\outputs\rededge_botrytis_b
uint16
(960, 1280, 4)
Saved additional plots for last image in ..\data\outputs\rededge_botrytis_b
