In [None]:
import arcpy
import os
import shutil
import time
from datetime import timedelta
from arcpy.sa import Raster, SetNull, CellStatistics, Con

# -----------------------------
# Setup environment
# -----------------------------
arcpy.env.workspace = r"C:\Users\ss2596\Documents\SENTINEL2 Mufunta\Mufunta_Sentinel2_6_2025"
arcpy.env.overwriteOutput = True

# -----------------------------
# Sentinel-2 RGB bands
# -----------------------------
bands = {"B02": "Blue", "B03": "Green", "B04": "Red"}  # band codes
parent_folder = arcpy.env.workspace
temp_folder = os.path.join(parent_folder, "temp_tiffs")
os.makedirs(temp_folder, exist_ok=True)

mosaicked_band_files = []
threshold = 1000  # pixels below this value are considered invalid

# -----------------------------
# Collect SAFE folders alphabetically (priority)
# -----------------------------
safe_folders = sorted([f for f in os.listdir(parent_folder) if f.endswith(".SAFE")])
print(f"Found {len(safe_folders)} SAFE folders to process")
print(f"Will process {len(bands)} bands: {', '.join(bands.values())}")
print(f"Expected total rasters to process: ~{len(safe_folders) * len(bands)}")

# Initialize timing variables
start_time = time.time()
total_rasters_expected = len(safe_folders) * len(bands)
rasters_processed = 0

def format_time(seconds):
    """Convert seconds to human readable format"""
    if seconds < 60:
        return f"{seconds:.0f}s"
    elif seconds < 3600:
        return f"{seconds//60:.0f}m {seconds%60:.0f}s"
    else:
        return f"{seconds//3600:.0f}h {(seconds%3600)//60:.0f}m"

def get_eta_info(processed, total, elapsed_time):
    """Calculate ETA and processing speed"""
    if processed == 0:
        return "calculating...", "calculating..."
    
    rate = processed / elapsed_time  # rasters per second
    remaining = total - processed
    eta_seconds = remaining / rate if rate > 0 else 0
    
    speed_info = f"{rate*60:.1f}/min" if rate > 0 else "calculating..."
    eta_info = format_time(eta_seconds) if eta_seconds > 0 else "calculating..."
    
    return speed_info, eta_info

print(f"\n⏱️  Processing started at {time.strftime('%H:%M:%S')}")

# -----------------------------
# Process each band
# -----------------------------
for band_idx, (band_code, band_name) in enumerate(bands.items(), 1):
    print(f"\n[{band_idx}/{len(bands)}] Processing {band_name} ({band_code})...")

    tiff_list = []
    counter = 0

    # Process all SAFE folders
    for safe_idx, safe in enumerate(safe_folders, 1):
        granule_path = os.path.join(parent_folder, safe, "GRANULE")
        if not os.path.exists(granule_path):
            continue
        
        for granule_id in os.listdir(granule_path):
            img_data_path = os.path.join(granule_path, granule_id, "IMG_DATA", "R10m")
            if not os.path.exists(img_data_path):
                continue

            # Find band JP2 files
            for f in os.listdir(img_data_path):
                if band_code in f and f.endswith(".jp2") and not f.startswith("MSK_"):
                    counter += 1
                    jp2_path = os.path.join(img_data_path, f)
                    
                    # Create unique filenames
                    unique_id = f"{counter:03d}_{granule_id[:8]}"
                    tiff_path = os.path.join(temp_folder, f"{unique_id}_{band_code}.tif")
                    masked_tiff = os.path.join(temp_folder, f"{unique_id}_{band_code}_masked.tif")
                    
                    # Clean up existing files
                    for file_path in [tiff_path, masked_tiff]:
                        if os.path.exists(file_path):
                            try:
                                os.remove(file_path)
                            except:
                                pass
                    
                    # Update timing and show progress with ETA
                    rasters_processed += 1
                    elapsed_time = time.time() - start_time
                    progress_pct = (safe_idx / len(safe_folders)) * 100
                    speed, eta = get_eta_info(rasters_processed, total_rasters_expected, elapsed_time)
                    
                    print(f"  Processing raster {counter} ({progress_pct:.0f}% complete) - "
                          f"Elapsed: {format_time(elapsed_time)} | Speed: {speed} | ETA: {eta}")
                    
                    # Process the file
                    arcpy.management.CopyRaster(jp2_path, tiff_path)
                    band_r = Raster(tiff_path)
                    
                    # More comprehensive masking: exclude invalid pixels (0, very low values, and NoData)
                    # Also check for typical Sentinel-2 fill values
                    masked_r = SetNull((band_r <= threshold) | (band_r == 0) | (band_r >= 65535), band_r)
                    arcpy.management.CopyRaster(masked_r, masked_tiff)
                    tiff_list.append(masked_tiff)
                    
                    # Clean up
                    del band_r, masked_r
                    try:
                        os.remove(tiff_path)
                    except:
                        pass

    if not tiff_list:
        print(f"  No valid rasters found for {band_name}")
        continue

    # -----------------------------
    # Mosaic using MEAN (average pixel values, ignoring masked)
    # -----------------------------
    out_raster = os.path.join(temp_folder, f"mosaic_{band_code}.tif")
    
    # Delete existing mosaic if it exists
    if os.path.exists(out_raster):
        try:
            os.remove(out_raster)
        except:
            pass
    
    print(f"  Found {len(tiff_list)} rasters for {band_name}")
    print(f"  Creating averaged mosaic with strict threshold filtering...")
    
    if len(tiff_list) > 1:
        try:
            print(f"    Setting up environment for {len(tiff_list)} rasters...")
            arcpy.env.extent = "MAXOF"
            arcpy.env.cellSize = "MINOF"
            
            # Clear any existing workspace cache to prevent memory issues
            arcpy.ClearWorkspaceCache_management()
            
            print(f"    Computing mean statistics...")
            # Use MEAN with DATA option to ignore NoData pixels in averaging
            # This ensures proper pixel-wise averaging across all valid pixels
            mean_raster = CellStatistics(tiff_list, "MEAN", "DATA")
            
            print(f"    Applying threshold mask...")
            # Apply strict threshold filtering to ensure no invalid pixels are included
            # This removes any pixels that fall outside our valid range after averaging
            final_masked = SetNull((mean_raster <= threshold) | 
                                 (mean_raster == 0) | 
                                 (mean_raster >= 65535), mean_raster)
            
            print(f"    Saving final mosaic...")
            arcpy.management.CopyRaster(final_masked, out_raster)
            
            # Clean up memory immediately
            del mean_raster, final_masked
            
            # Reset environment
            arcpy.env.extent = None
            arcpy.env.cellSize = None
            
            print(f"    ✓ Mosaic creation completed successfully")
            
            # Clean up individual files
            for tiff_file in tiff_list:
                try:
                    os.remove(tiff_file)
                except:
                    pass
            
        except Exception as stats_error:
            print(f"  Using fallback mosaic method...")
            arcpy.management.CopyRaster(tiff_list[0], out_raster)
            for i, tiff in enumerate(tiff_list[1:], 1):
                arcpy.management.Mosaic(
                    inputs=[tiff],
                    target=out_raster,
                    mosaic_type="MEAN"
                )
    else:
        arcpy.management.CopyRaster(tiff_list[0], out_raster)

    mosaicked_band_files.append(out_raster)
    print(f"  ✓ {band_name} mosaic completed")

# -----------------------------
# Combine bands into final RGB raster
# -----------------------------
if mosaicked_band_files:
    rgb_order = [
        os.path.join(temp_folder, "mosaic_B04.tif"),  # Red
        os.path.join(temp_folder, "mosaic_B03.tif"),  # Green
        os.path.join(temp_folder, "mosaic_B02.tif")   # Blue
    ]

    # Use parent folder name for final output
    folder_name = os.path.basename(parent_folder.rstrip("\\/"))
    final_rgb = os.path.join(parent_folder, f"{folder_name}.tif")

    print(f"\nCreating final RGB composite...")
    arcpy.management.CompositeBands(rgb_order, final_rgb)
    
    print("Calculating statistics...")
    arcpy.management.CalculateStatistics(final_rgb)
    print(f"✅ RGB mosaic saved: {final_rgb}")

    # Final timing summary
    total_elapsed = time.time() - start_time
    print(f"\n⏱️  Processing completed at {time.strftime('%H:%M:%S')}")
    print(f"⏱️  Total processing time: {format_time(total_elapsed)}")
    if rasters_processed > 0:
        avg_rate = rasters_processed / total_elapsed
        print(f"⏱️  Average processing rate: {avg_rate*60:.1f} rasters/minute")


# -----------------------------
# Delete Temporary Files
# -----------------------------
print(f"\nCleaning up temporary files...")

# Clear variables
try:
    del band_r
except:
    pass
try:
    del masked_r
except:
    pass
try:
    del mean_raster
except:
    pass

# Clear ArcPy cache
arcpy.ClearWorkspaceCache_management()
arcpy.ClearEnvironment("workspace")

import gc, time
gc.collect()
time.sleep(3)

try:
    shutil.rmtree(temp_folder)
    print("✅ Cleanup completed")
except Exception as e:
    print(f"⚠️ Could not remove temp folder: {e}")


In [3]:
import os
import xml.etree.ElementTree as ET

# -----------------------------
# Simplified Cloud Coverage Analysis and Folder Renaming Tool
# -----------------------------

# Configuration - Edit this path as needed
base_directory = r"C:\Users\ss2596\Documents\SENTINEL2 Mufunta\Mufunta_Sentinel2_8_2025"
    
# Get all directories that might contain SAFE folders
if not os.path.exists(base_directory):
    print(f"❌ Directory not found: {base_directory}")
else:
    # Look for directories containing SAFE folders
    folders_to_process = []
    
    # Check if current directory has SAFE folders
    safe_folders = [f for f in os.listdir(base_directory) if f.endswith(".SAFE")]
    if safe_folders:
        folders_to_process.append(base_directory)
    
    # Check subdirectories for SAFE folders
    for item in os.listdir(base_directory):
        item_path = os.path.join(base_directory, item)
        if os.path.isdir(item_path):
            sub_safe_folders = [f for f in os.listdir(item_path) if f.endswith(".SAFE")]
            if sub_safe_folders:
                folders_to_process.append(item_path)
    
    if not folders_to_process:
        print(f"❌ No folders containing SAFE files found in: {base_directory}")
    else:
        print(f"Found {len(folders_to_process)} folders to analyze:")
        for folder in folders_to_process:
            print(f"  {folder}")
        
        # Process each folder
        for folder_path in folders_to_process:
            folder_name = os.path.basename(folder_path.rstrip("\\/"))
            
            # Skip if already processed (has cloud percentage in name)
            if "_cloud" in folder_name and "pct" in folder_name:
                print(f"⏭️  Skipping {folder_name} (already processed)")
                continue
            
            print(f"\n📊 Analyzing cloud coverage for: {folder_name}")
            
            # Get SAFE folders in this directory
            safe_folders = sorted([f for f in os.listdir(folder_path) if f.endswith(".SAFE")])
            print(f"  Found {len(safe_folders)} SAFE folders")
            
            total_cloud_pixels = 0
            total_pixels = 0
            tiles_analyzed = 0
            
            # Analyze each SAFE folder individually
            for safe in safe_folders:
                safe_folder_path = os.path.join(folder_path, safe)
                
                # Skip if already processed (has cloud percentage prefix)
                if safe.startswith("cloud") and "pct_" in safe:
                    print(f"  ⏭️  Skipping {safe} (already processed)")
                    continue
                
                print(f"  📊 Analyzing SAFE folder: {safe}")
                
                granule_path = os.path.join(safe_folder_path, "GRANULE")
                if not os.path.exists(granule_path):
                    print(f"    ❌ No GRANULE folder found")
                    continue
                
                # Collect cloud percentages from all tiles in this SAFE folder
                tile_cloud_percentages = []
                
                for granule_id in os.listdir(granule_path):
                    qi_data_path = os.path.join(granule_path, granule_id, "QI_DATA")
                    if not os.path.exists(qi_data_path):
                        continue
                    
                    # Look for L2A_QUALITY.xml file
                    quality_xml_path = os.path.join(qi_data_path, "L2A_QUALITY.xml")
                    if not os.path.exists(quality_xml_path):
                        quality_xml_path = os.path.join(qi_data_path, "l2a_quality.xml")
                    
                    if os.path.exists(quality_xml_path):
                        try:
                            # Parse XML and extract CLOUDY_PIXEL_PERCENTAGE
                            tree = ET.parse(quality_xml_path)
                            root = tree.getroot()
                            
                            # Find the CLOUDY_PIXEL_PERCENTAGE value
                            for elem in root.iter():
                                if elem.get('name') == 'CLOUDY_PIXEL_PERCENTAGE':
                                    tile_cloud_pct = float(elem.text)
                                    tile_cloud_percentages.append(tile_cloud_pct)
                                    print(f"    {granule_id[:15]}: {tile_cloud_pct:.1f}% cloud coverage")
                                    break
                                    
                        except (ET.ParseError, ValueError) as e:
                            print(f"    {granule_id[:15]}: Error reading XML - {str(e)[:30]}")
                            continue
                    else:
                        print(f"    {granule_id[:15]}: No L2A_QUALITY.xml found")
                
                # Calculate average cloud coverage for this SAFE folder
                if tile_cloud_percentages:
                    safe_cloud_coverage = sum(tile_cloud_percentages) / len(tile_cloud_percentages)
                    print(f"    📈 Average cloud coverage: {safe_cloud_coverage:.1f}% ({len(tile_cloud_percentages)} tiles)")
                    
                    # Create new SAFE folder name with cloud coverage prepended
                    cloud_pct_str = f"{safe_cloud_coverage:06.2f}".replace(".", "_")  # Format: XX_XX
                    new_safe_name = f"cloud{cloud_pct_str}pct_{safe}"
                    new_safe_path = os.path.join(folder_path, new_safe_name)
                    
                    # Rename folder
                    try:
                        os.rename(safe_folder_path, new_safe_path)
                        print(f"    ✅ Renamed to: {new_safe_name}")
                        
                        # Report cloud status
                        if safe_cloud_coverage == 0:
                            print(f"    🟢 Clean imagery (0% clouds)")
                        elif safe_cloud_coverage <= 5:
                            print(f"    🟢 Low cloud coverage ({safe_cloud_coverage:.1f}%)")
                        elif safe_cloud_coverage <= 20:
                            print(f"    🟡 Moderate cloud coverage ({safe_cloud_coverage:.1f}%)")
                        else:
                            print(f"    🔴 High cloud coverage ({safe_cloud_coverage:.1f}%)")
                            
                    except Exception as e:
                        print(f"    ❌ Rename failed: {str(e)[:50]}")
                        
                else:
                    print(f"    ⚠️  No valid cloud data found - folder unchanged")

print("✅ Cloud coverage analysis complete!")

Found 1 folders to analyze:
  C:\Users\ss2596\Documents\SENTINEL2 Mufunta\Mufunta_Sentinel2_8_2025

📊 Analyzing cloud coverage for: Mufunta_Sentinel2_8_2025
  Found 104 SAFE folders
  📊 Analyzing SAFE folder: S2A_MSIL2A_20250806T082511_N0511_R078_T35KKB_20250806T112013.SAFE
    L2A_T35KKB_A052: 0.0% cloud coverage
    📈 Average cloud coverage: 0.0% (1 tiles)
    ✅ Renamed to: cloud000_00pct_S2A_MSIL2A_20250806T082511_N0511_R078_T35KKB_20250806T112013.SAFE
    🟢 Low cloud coverage (0.0%)
  📊 Analyzing SAFE folder: S2A_MSIL2A_20250806T082511_N0511_R078_T35KLB_20250806T112013.SAFE
    L2A_T35KLB_A052: 0.0% cloud coverage
    📈 Average cloud coverage: 0.0% (1 tiles)
    ✅ Renamed to: cloud000_00pct_S2A_MSIL2A_20250806T082511_N0511_R078_T35KLB_20250806T112013.SAFE
    🟢 Low cloud coverage (0.0%)
  📊 Analyzing SAFE folder: S2A_MSIL2A_20250806T082511_N0511_R078_T35LKC_20250806T112013.SAFE
    L2A_T35LKC_A052: 0.0% cloud coverage
    📈 Average cloud coverage: 0.0% (1 tiles)
    ✅ Renamed to: c

In [None]:
import os
import re

# -----------------------------
# Remove Cloud Percentage Prefixes from SAFE Folder Names
# -----------------------------

# Configuration - Edit this path as needed
base_directory = r"C:\Users\ss2596\Documents\SENTINEL2 Mufunta\Mufunta_Sentinel2_8_2025"

def remove_cloud_prefixes(directory_path):
    """
    Remove cloud percentage prefixes (cloudXX_Xpct_) from SAFE folder names
    """
    
    if not os.path.exists(directory_path):
        print(f"❌ Directory not found: {directory_path}")
        return
    
    # Get all SAFE folders with cloud prefixes
    safe_folders = [f for f in os.listdir(directory_path) if f.endswith(".SAFE")]
    cloud_folders = [f for f in safe_folders if f.startswith("cloud") and "pct_" in f]
    
    if not cloud_folders:
        print(f"No SAFE folders with cloud prefixes found in: {directory_path}")
        return
    
    print(f"Found {len(cloud_folders)} SAFE folders with cloud prefixes to rename:")
    
    renamed_count = 0
    failed_count = 0
    
    for folder in cloud_folders:
        # Remove the cloudXX_XXpct_ prefix pattern (matches new format with 2 decimal places)
        original_name = re.sub(r'^cloud\d+_\d{2}pct_', '', folder)
        
        if original_name == folder:
            print(f"  ⚠️  Skipping {folder} - no cloud prefix/suffix pattern found")
            continue
        
        current_path = os.path.join(directory_path, folder)
        new_path = os.path.join(directory_path, original_name)
        
        # Check if target name already exists
        if os.path.exists(new_path):
            print(f"  ❌ Cannot rename {folder} -> {original_name} (target already exists)")
            failed_count += 1
            continue
        
        try:
            print(f"  🔄 Renaming: {folder} -> {original_name}")
            
            # Clear any potential file locks
            import gc
            gc.collect()
            
            os.rename(current_path, new_path)
            print(f"  ✅ Successfully renamed")
            renamed_count += 1
            
        except PermissionError:
            print(f"  ❌ Permission denied - folder may be in use")
            failed_count += 1
        except Exception as e:
            print(f"  ❌ Error renaming folder: {str(e)[:50]}")
            failed_count += 1
    
    print(f"\n📊 Summary:")
    print(f"  ✅ Successfully renamed: {renamed_count} folders")
    print(f"  ❌ Failed to rename: {failed_count} folders")
    print(f"  📁 Total processed: {len(cloud_folders)} folders")

# Run the cleanup
print("🧹 Starting cloud prefix removal...")
remove_cloud_prefixes(base_directory)
print("✅ Cloud prefix removal complete!")

🧹 Starting cloud prefix removal...
Found 100 SAFE folders with cloud prefixes to rename:
  🔄 Renaming: cloud000_0pct_S2A_MSIL2A_20250806T082511_N0511_R078_T35KLB_20250806T112013.SAFE -> S2A_MSIL2A_20250806T082511_N0511_R078_T35KLB_20250806T112013.SAFE
  ✅ Successfully renamed
  🔄 Renaming: cloud000_0pct_S2A_MSIL2A_20250806T082511_N0511_R078_T35LKC_20250806T112013.SAFE -> S2A_MSIL2A_20250806T082511_N0511_R078_T35LKC_20250806T112013.SAFE
  ✅ Successfully renamed
  🔄 Renaming: cloud000_0pct_S2A_MSIL2A_20250806T082511_N0511_R078_T35LLC_20250806T112013.SAFE -> S2A_MSIL2A_20250806T082511_N0511_R078_T35LLC_20250806T112013.SAFE
  ✅ Successfully renamed
  🔄 Renaming: cloud000_0pct_S2A_MSIL2A_20250809T083521_N0511_R121_T35KKB_20250809T120015.SAFE -> S2A_MSIL2A_20250809T083521_N0511_R121_T35KKB_20250809T120015.SAFE
  ✅ Successfully renamed
  🔄 Renaming: cloud000_0pct_S2A_MSIL2A_20250809T083521_N0511_R121_T35KLB_20250809T120015.SAFE -> S2A_MSIL2A_20250809T083521_N0511_R121_T35KLB_20250809T120015.S