In [None]:
import os
import gc
import numpy as np
import rasterio
from pathlib import Path
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
import multiprocessing as mp
from typing import List, Dict, Tuple, Optional
import time
import psutil
from scipy import ndimage
import cv2
warnings.filterwarnings('ignore')

In [None]:
# Configuration
CONFIG = {
    'cloud_threshold': 0.3,
    'io_workers': min(8, mp.cpu_count()),
    'memory_threshold_gb': 2,
    'enable_vegetation_indices': True, 
    'enable_super_resolution': False,  
    'enable_advanced_cloud_detection': False,  
    'enable_idw_interpolation': False,  
    'enable_radiometric_normalization': False,
    'downsample_factor': 1,
    'max_bands_per_composite': 12,
    'skip_quality_checks': True,
}

# Essential bands only
ESSENTIAL_BANDS = ['B02', 'B03', 'B04', 'B08', 'B11']
VEGETATION_INDICES = ['EVI', 'GNDVI', 'BSI', 'MSAVI2'] if CONFIG['enable_vegetation_indices'] else []
S1_BANDS = ['VV', 'VH']
SCL_INVALID = [0, 1, 8, 9, 10, 11]

In [None]:
def get_system_info():
    """Get available memory in GB"""
    return psutil.virtual_memory().available / (1024**3)

def read_raster(file_path: Path) -> Optional[np.ndarray]:
    """Read raster with configurable optimizations"""
    try:
        with rasterio.open(file_path) as src:
            # Apply downsampling if configured
            downsample = CONFIG['downsample_factor']
            if downsample > 1:
                out_shape = (src.height // downsample, src.width // downsample)
                data = src.read(1, out_shape=out_shape, resampling=rasterio.enums.Resampling.nearest)
            else:
                # For very large files, auto-downsample
                if src.width > 4000 or src.height > 4000:
                    out_shape = (src.height // 2, src.width // 2)
                    data = src.read(1, out_shape=out_shape, resampling=rasterio.enums.Resampling.nearest)
                else:
                    data = src.read(1)
            
            # Convert to float32 efficiently
            if data.dtype != np.float32:
                data = data.astype(np.float32)
            
            # Handle nodata efficiently
            if src.nodata is not None:
                data[data == src.nodata] = np.nan
            
            return data
    except Exception:
        return None

def detect_clouds(scl_data: np.ndarray, bands: Dict[str, np.ndarray] = None) -> Tuple[np.ndarray, float]:
    """Cloud detection with configurable complexity"""
    try:
        if CONFIG['enable_advanced_cloud_detection'] and bands is not None:
            # Advanced detection
            cloud_mask = np.isin(scl_data, SCL_INVALID)
            
            b02, b04, b08, b11 = [bands.get(b) for b in ['B02', 'B04', 'B08', 'B11']]
            
            if b02 is not None:
                blue_threshold = np.nanpercentile(b02, 85)
                cloud_mask |= (b02 > blue_threshold)
            
            if b08 is not None and b04 is not None:
                ndvi = (b08 - b04) / (b08 + b04 + 1e-8)
                ndvi_threshold = np.nanpercentile(ndvi, 15)
                cloud_mask |= (ndvi < ndvi_threshold)
            
            if b11 is not None and b02 is not None:
                cloud_spectral = (b02 > np.nanpercentile(b02, 75)) & (b11 < np.nanpercentile(b11, 50))
                cloud_mask |= cloud_spectral
            
            # Morphological operations
            kernel = np.ones((3, 3), np.uint8)
            cloud_mask = ndimage.binary_closing(cloud_mask, structure=kernel)
            cloud_mask = ndimage.binary_opening(cloud_mask, structure=kernel)
        else:
            # Basic detection
            if CONFIG['skip_quality_checks']:
                # Reduced set for speed
                cloud_mask = np.isin(scl_data, [0, 1, 8, 9])
            else:
                cloud_mask = np.isin(scl_data, SCL_INVALID)
        
        quality_score = np.sum(~cloud_mask) / cloud_mask.size
        return cloud_mask, quality_score
        
    except Exception:
        return np.zeros_like(scl_data, dtype=bool), 0.0

def compute_vegetation_indices(bands: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
    """Compute vegetation indices based on configuration"""
    if not CONFIG['enable_vegetation_indices']:
        return {}
    
    indices = {}
    eps = 1e-8
    
    b02, b03, b04, b08, b11 = [bands.get(b) for b in ['B02', 'B03', 'B04', 'B08', 'B11']]
    
    try:
        # Vectorized operations for efficiency
        if b08 is not None and b04 is not None:
            # EVI - Enhanced Vegetation Index
            if b02 is not None:
                evi = 2.5 * (b08 - b04) / (b08 + 6 * b04 - 7.5 * b02 + 1 + eps)
                indices['EVI'] = np.clip(evi, -1, 1)
            
            # GNDVI - Green Normalized Difference Vegetation Index
            if b03 is not None:
                gndvi = (b08 - b03) / (b08 + b03 + eps)
                indices['GNDVI'] = np.clip(gndvi, -1, 1)
            
            # BSI - Bare Soil Index
            if b11 is not None and b02 is not None:
                bsi = ((b11 + b04) - (b08 + b02)) / ((b11 + b04) + (b08 + b02) + eps)
                indices['BSI'] = np.clip(bsi, -1, 1)
            
            # MSAVI2 - Modified Soil Adjusted Vegetation Index
            sqrt_term = np.sqrt(np.maximum((2 * b08 + 1)**2 - 8 * (b08 - b04), 0))
            msavi2 = (2 * b08 + 1 - sqrt_term) / 2
            indices['MSAVI2'] = np.clip(msavi2, -1, 1)
    except Exception:
        pass
    
    return indices

def gap_fill(data: np.ndarray, mask: np.ndarray) -> np.ndarray:
    """Gap filling with configurable method"""
    if not np.any(mask) or CONFIG['skip_quality_checks']:
        return data
    
    try:
        if CONFIG['enable_idw_interpolation']:
            # IDW interpolation (higher quality)
            result = data.copy()
            missing_coords = np.where(mask)
            h, w = data.shape
            radius = 12  # IDW radius
            
            for y, x in zip(missing_coords[0], missing_coords[1]):
                y_min, y_max = max(0, y - radius), min(h, y + radius + 1)
                x_min, x_max = max(0, x - radius), min(w, x + radius + 1)
                
                local_data = data[y_min:y_max, x_min:x_max]
                local_mask = mask[y_min:y_max, x_min:x_max]
                valid_mask = ~local_mask & ~np.isnan(local_data)
                
                if np.any(valid_mask):
                    local_y, local_x = y - y_min, x - x_min
                    valid_coords = np.where(valid_mask)
                    distances = np.sqrt((valid_coords[0] - local_y)**2 + (valid_coords[1] - local_x)**2)
                    distances = np.maximum(distances, 0.1)
                    
                    weights = 1.0 / distances**2
                    weights /= np.sum(weights)
                    
                    valid_values = local_data[valid_mask]
                    result[y, x] = np.sum(weights * valid_values)
                else:
                    result[y, x] = np.nanmean(local_data)
            
            return result
        else:
            # Convolution fill
            result = data.copy()
            kernel = np.ones((3, 3), dtype=np.float32) / 9
            filled = ndimage.convolve(data, kernel, mode='nearest')
            result[mask] = filled[mask]
            return result
        
    except Exception:
        return data

def normalize_radiometry(target_bands: Dict, reference_bands: Dict) -> Dict:
    """Radiometric normalization (configurable)"""
    if not CONFIG['enable_radiometric_normalization']:
        return target_bands
    
    normalized_bands = {}
    
    for band_name in target_bands:
        if band_name in reference_bands:
            target_data = target_bands[band_name]
            reference_data = reference_bands[band_name]
            
            target_valid = ~np.isnan(target_data)
            ref_valid = ~np.isnan(reference_data)
            
            if np.any(target_valid) and np.any(ref_valid):
                target_mean = np.nanmean(target_data[target_valid])
                target_std = np.nanstd(target_data[target_valid])
                ref_mean = np.nanmean(reference_data[ref_valid])
                ref_std = np.nanstd(reference_data[ref_valid])
                
                if target_std > 0 and ref_std > 0:
                    normalized = (target_data - target_mean) * (ref_std / target_std) + ref_mean
                    normalized_bands[band_name] = normalized
                else:
                    normalized_bands[band_name] = target_data
            else:
                normalized_bands[band_name] = target_data
        else:
            normalized_bands[band_name] = target_bands[band_name]
    
    return normalized_bands

def apply_super_resolution(data: np.ndarray) -> np.ndarray:
    """Apply super-resolution enhancement (configurable)"""
    if not CONFIG['enable_super_resolution']:
        return data
    
    try:
        factor = 2
        h, w = data.shape
        new_h, new_w = h * factor, w * factor
        
        upscaled = cv2.resize(data, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
        
        kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
        enhanced = cv2.filter2D(upscaled, -1, kernel)
        
        result = 0.8 * upscaled + 0.2 * enhanced
        return result.astype(np.float32)
        
    except Exception:
        return data

In [None]:
class SatelliteProcessor:
    def __init__(self, input_dir: str):
        self.input_dir = Path(input_dir)
        self.output_dir = self.input_dir / "processed"
        self.cloud_free_dir = self.output_dir / "monthly_cloud_free"
        self.median_dir = self.output_dir / "monthly_median"
        
        # Create output directories
        self.cloud_free_dir.mkdir(parents=True, exist_ok=True)
        self.median_dir.mkdir(parents=True, exist_ok=True)
    
    def discover_data(self) -> Dict[str, Dict]:
        """Discover satellite data with optimized scanning"""
        print("🔍 Discovering satellite data...")
        
        monthly_data = {}
        
        for sat_type in ['sentinel1', 'sentinel2']:
            base_path = self.input_dir / "raw" / "images" / sat_type
            if not base_path.exists():
                continue
            
            # Use os.scandir for better performance
            try:
                with os.scandir(base_path) as entries:
                    for entry in entries:
                        if not entry.is_dir():
                            continue
                        
                        name_parts = entry.name.split('_')
                        try:
                            if sat_type == 'sentinel1' and len(name_parts) >= 5:
                                date_part = name_parts[4][:8]
                            elif sat_type == 'sentinel2' and len(name_parts) >= 3:
                                date_part = name_parts[2][:8]
                            else:
                                continue
                            
                            if len(date_part) >= 6 and date_part[:6].isdigit():
                                year_month = f"{date_part[:4]}_{date_part[4:6]}"
                                
                                if year_month not in monthly_data:
                                    monthly_data[year_month] = {'s1_items': [], 's2_items': []}
                                
                                key = 's1_items' if sat_type == 'sentinel1' else 's2_items'
                                monthly_data[year_month][key].append(Path(entry.path))
                        except (IndexError, ValueError):
                            continue
            except OSError:
                continue
        
        total_items = sum(len(data['s1_items']) + len(data['s2_items']) for data in monthly_data.values())
        print(f"   📊 Found {total_items} items, {len(monthly_data)} months")
        return monthly_data
    
    def load_s2_item(self, item_dir: Path) -> Tuple[str, Optional[Dict]]:
        """Load S2 item with configurable processing"""
        try:
            bands = {}
            
            # Efficient file discovery
            band_files = {}
            essential_plus_scl = ESSENTIAL_BANDS + (['SCL'] if not CONFIG['skip_quality_checks'] else [])
            
            # Use os.scandir for better performance
            try:
                with os.scandir(item_dir) as entries:
                    for entry in entries:
                        if entry.name.endswith(('.tif', '.tiff', '.jp2')):
                            stem_upper = entry.name.upper()
                            for band_name in essential_plus_scl:
                                if band_name in stem_upper:
                                    band_files[band_name] = Path(entry.path)
                                    break
                            # Early termination for speed
                            if len(band_files) >= len(ESSENTIAL_BANDS):
                                break
            except OSError:
                return item_dir.name, None
            
            # Load essential bands
            reference_shape = None
            for band_name in ESSENTIAL_BANDS:
                if band_name in band_files:
                    data = read_raster(band_files[band_name])
                    if data is not None:
                        if reference_shape is None:
                            reference_shape = data.shape
                        elif data.shape != reference_shape:
                            data = cv2.resize(data, (reference_shape[1], reference_shape[0]), 
                                            interpolation=cv2.INTER_NEAREST)
                        bands[band_name] = data
            
            if len(bands) < 3:
                return item_dir.name, None
            
            # Vegetation indices (if enabled)
            if CONFIG['enable_vegetation_indices']:
                indices = compute_vegetation_indices(bands)
                bands.update(indices)
                
                # Limit total bands for performance
                if len(bands) > CONFIG['max_bands_per_composite']:
                    priority_bands = ['B04', 'B08', 'B11', 'EVI', 'GNDVI', 'B02', 'B03']
                    kept_bands = {}
                    for band_name in priority_bands:
                        if band_name in bands:
                            kept_bands[band_name] = bands[band_name]
                        if len(kept_bands) >= CONFIG['max_bands_per_composite']:
                            break
                    bands = kept_bands
            
            # Cloud detection
            cloud_mask = None
            cloud_cover_percent = 0
            
            if not CONFIG['skip_quality_checks'] and 'SCL' in band_files:
                scl_data = read_raster(band_files['SCL'])
                if scl_data is not None:
                    if scl_data.shape != reference_shape:
                        scl_data = cv2.resize(scl_data, (reference_shape[1], reference_shape[0]), 
                                            interpolation=cv2.INTER_NEAREST)
                    
                    cloud_mask, _ = detect_clouds(scl_data, bands if CONFIG['enable_advanced_cloud_detection'] else None)
                    cloud_cover_percent = np.sum(cloud_mask) / cloud_mask.size * 100
            
            return item_dir.name, {
                'bands': bands,
                'cloud_mask': cloud_mask,
                'cloud_cover_percent': cloud_cover_percent
            }
            
        except Exception:
            return item_dir.name, None
    
    def load_s1_item(self, item_dir: Path) -> Tuple[str, Optional[Dict]]:
        """Load S1 item"""
        try:
            # Scan for VV/VH files
            vv_file = vh_file = None
            
            try:
                with os.scandir(item_dir) as entries:
                    for entry in entries:
                        if entry.name.endswith(('.tif', '.tiff')):
                            stem_upper = entry.name.upper()
                            if 'VV' in stem_upper and vv_file is None:
                                vv_file = Path(entry.path)
                            elif 'VH' in stem_upper and vh_file is None:
                                vh_file = Path(entry.path)
                            
                            if vv_file and vh_file:
                                break
            except OSError:
                return item_dir.name, None
            
            if not vv_file or not vh_file:
                return item_dir.name, None
            
            # Load data
            vv_data = read_raster(vv_file)
            vh_data = read_raster(vh_file)
            
            if vv_data is None or vh_data is None:
                return item_dir.name, None
            
            # Shape matching
            if vv_data.shape != vh_data.shape:
                min_h = min(vv_data.shape[0], vh_data.shape[0])
                min_w = min(vv_data.shape[1], vh_data.shape[1])
                vv_data = vv_data[:min_h, :min_w]
                vh_data = vh_data[:min_h, :min_w]
            
            # dB conversion with clipping
            vv_db = np.clip(10 * np.log10(np.maximum(vv_data, 1e-10)), -30, 5)
            vh_db = np.clip(10 * np.log10(np.maximum(vh_data, 1e-10)), -30, 5)
            
            return item_dir.name, {
                'bands': {'VV': vv_db, 'VH': vh_db}
            }
            
        except Exception:
            return item_dir.name, None
    
    def create_cloud_free_composite(self, s2_items: List[Tuple]) -> Optional[Dict]:
        """Create cloud-free composite with configurable processing"""
        valid_items = [(item_id, data) for item_id, data in s2_items if data is not None]
        
        if not valid_items:
            return None
        
        # Reference selection
        if CONFIG['skip_quality_checks']:
            reference_item = valid_items[0]
        else:
            reference_item = min(valid_items, 
                               key=lambda x: x[1].get('cloud_cover_percent', 100))
        
        ref_id, ref_data = reference_item
        ref_bands = ref_data['bands'].copy()
        
        # Apply cloud mask
        if not CONFIG['skip_quality_checks'] and ref_data.get('cloud_mask') is not None:
            cloud_mask = ref_data['cloud_mask']
            for band_name in ref_bands:
                ref_bands[band_name] = np.where(cloud_mask, np.nan, ref_bands[band_name])
        
        composite_bands = {}
        
        for band_name, ref_band in ref_bands.items():
            result = ref_band.copy()
            
            # Gap filling
            if not CONFIG['skip_quality_checks']:
                missing_mask = np.isnan(result)
                if np.any(missing_mask):
                    # Create median composite from all valid images
                    valid_stack = []
                    for item_id, data in valid_items:
                        if (item_id != ref_id and 'bands' in data and 
                            band_name in data['bands'] and
                            data['bands'][band_name].shape == result.shape):
                            
                            fill_data = data['bands'][band_name].copy()
                            
                            # Apply cloud mask to fill data
                            if data.get('cloud_mask') is not None:
                                fill_data = np.where(data['cloud_mask'], np.nan, fill_data)
                            
                            if np.any(~np.isnan(fill_data)):
                                valid_stack.append(fill_data)
                    
                    if valid_stack:
                        # Ensure all arrays have the same shape before creating median
                        ref_shape = result.shape
                        aligned_stack = []
                        for fill_data in valid_stack:
                            if fill_data.shape == ref_shape:
                                aligned_stack.append(fill_data)
                            else:
                                # Resize to match reference shape
                                resized = cv2.resize(fill_data, (ref_shape[1], ref_shape[0]), 
                                                   interpolation=cv2.INTER_LINEAR)
                                aligned_stack.append(resized)
                        
                        if aligned_stack:
                            median_composite = np.nanmedian(aligned_stack, axis=0)
                        
                        # Apply radiometric normalization if enabled
                        if CONFIG['enable_radiometric_normalization']:
                            normalized_composite = normalize_radiometry(
                                {band_name: median_composite}, 
                                {band_name: ref_band}
                            )[band_name]
                        else:
                            normalized_composite = median_composite
                        
                        # Fill gaps
                        fill_mask = missing_mask & ~np.isnan(normalized_composite)
                        result[fill_mask] = normalized_composite[fill_mask]
                        missing_mask = np.isnan(result)
                    
                    # Apply gap filling method based on configuration
                    if np.any(missing_mask):
                        result = gap_fill(result, missing_mask)
            
            composite_bands[band_name] = result
        
        if len(composite_bands) >= 3:
            return {
                'bands': composite_bands,
                'metadata': {
                    'reference_image': ref_id,
                    'num_source_images': len(valid_items),
                    'processing_method': 'configurable_cloud_free'
                }
            }
        
        return None
    
    def create_median_composite(self, s2_items: List[Tuple], s1_items: List[Tuple]) -> Optional[Dict]:
        """Create median composite with configurable processing"""
        all_bands = {}
        
        # Process S2 data
        if s2_items:
            valid_s2 = [(item_id, data) for item_id, data in s2_items if data is not None]
            
            if valid_s2:
                # Get reference shape
                reference_shape = None
                for _, data in valid_s2:
                    if 'bands' in data and data['bands']:
                        reference_shape = next(iter(data['bands'].values())).shape
                        break
                
                if reference_shape:
                    target_bands = ESSENTIAL_BANDS + VEGETATION_INDICES
                    
                    for band_name in target_bands:
                        band_stack = []
                        
                        for _, data in valid_s2:
                            if ('bands' in data and band_name in data['bands']):
                                band_data = data['bands'][band_name]
                                
                                # Apply cloud mask
                                if not CONFIG['skip_quality_checks'] and data.get('cloud_mask') is not None:
                                    band_data = np.where(data['cloud_mask'], np.nan, band_data)
                                
                                if (band_data.shape == reference_shape and 
                                    (CONFIG['skip_quality_checks'] or np.any(~np.isnan(band_data)))):
                                    band_stack.append(band_data)
                        
                        if len(band_stack) >= 1:
                            if len(band_stack) == 1:
                                result = band_stack[0].astype(np.float32)
                            else:
                                # Ensure all arrays have the same shape before stacking
                                ref_shape = band_stack[0].shape
                                aligned_stack = []
                                for band_data in band_stack:
                                    if band_data.shape == ref_shape:
                                        aligned_stack.append(band_data)
                                    else:
                                        # Resize to match reference shape
                                        resized = cv2.resize(band_data, (ref_shape[1], ref_shape[0]), 
                                                           interpolation=cv2.INTER_LINEAR)
                                        aligned_stack.append(resized)
                                
                                if len(aligned_stack) >= 1:
                                    result = np.nanmedian(aligned_stack, axis=0).astype(np.float32)
                                else:
                                    continue
                            
                            # Apply super-resolution if enabled
                            if CONFIG['enable_super_resolution']:
                                result = apply_super_resolution(result)
                            
                            all_bands[band_name] = result
        
        # Process S1 data
        if s1_items:
            valid_s1 = [(item_id, data) for item_id, data in s1_items if data is not None]
            
            for band_name in S1_BANDS:
                band_stack = []
                
                for _, data in valid_s1:
                    if ('bands' in data and band_name in data['bands']):
                        band_data = data['bands'][band_name]
                        if CONFIG['skip_quality_checks'] or np.any(~np.isnan(band_data)):
                            band_stack.append(band_data)
                
                if len(band_stack) >= 1:
                    if len(band_stack) == 1:
                        result = band_stack[0].astype(np.float32)
                    else:
                        # Ensure all S1 arrays have the same shape
                        ref_shape = band_stack[0].shape
                        aligned_stack = []
                        for band_data in band_stack:
                            if band_data.shape == ref_shape:
                                aligned_stack.append(band_data)
                            else:
                                # Resize S1 data to match reference shape
                                resized = cv2.resize(band_data, (ref_shape[1], ref_shape[0]), 
                                                   interpolation=cv2.INTER_LINEAR)
                                aligned_stack.append(resized)
                        
                        if len(aligned_stack) >= 1:
                            result = np.nanmedian(aligned_stack, axis=0).astype(np.float32)
                        else:
                            continue
                    
                    # Apply super-resolution if enabled
                    if CONFIG['enable_super_resolution']:
                        result = apply_super_resolution(result)
                    
                    all_bands[band_name] = result
        
        if len(all_bands) >= 3:
            return {
                'bands': all_bands,
                'metadata': {
                    'num_s2_images': len([x for x in s2_items if x[1] is not None]),
                    'num_s1_images': len([x for x in s1_items if x[1] is not None]),
                    'processing_method': 'configurable_median',
                    'super_resolution_applied': CONFIG['enable_super_resolution']
                }
            }
        
        return None
    
    def save_composite(self, data: Dict, output_path: Path) -> bool:
        """Save composite with configurable compression"""
        try:
            bands = data['bands']
            band_names = sorted(bands.keys())
            first_band = list(bands.values())[0]
            height, width = first_band.shape
            
            # Profile based on configuration
            if CONFIG['enable_super_resolution'] or len(band_names) > 6:
                # Use compression for larger files
                profile = {
                    'driver': 'GTiff',
                    'count': len(band_names),
                    'height': height,
                    'width': width,
                    'dtype': 'float32',
                    'compress': 'lzw',
                    'predictor': 2,
                    'tiled': True,
                    'blockxsize': 512,
                    'blockysize': 512
                }
            else:
                # Optimized profile for speed
                profile = {
                    'driver': 'GTiff',
                    'count': len(band_names),
                    'height': height,
                    'width': width,
                    'dtype': 'float32',
                    'compress': 'none',
                    'tiled': False,
                    'interleave': 'pixel'
                }
            
            output_path.parent.mkdir(parents=True, exist_ok=True)
            
            with rasterio.open(output_path, 'w', **profile) as dst:
                # Write all bands efficiently
                if CONFIG['skip_quality_checks']:
                    # Batch write for speed - ensure all bands have same shape
                    first_shape = list(bands.values())[0].shape
                    aligned_bands = []
                    for name in band_names:
                        band_data = bands[name]
                        if band_data.shape == first_shape:
                            aligned_bands.append(band_data)
                        else:
                            # Resize to match first band shape
                            resized = cv2.resize(band_data, (first_shape[1], first_shape[0]), 
                                               interpolation=cv2.INTER_LINEAR)
                            aligned_bands.append(resized)
                    
                    band_array = np.stack(aligned_bands)
                    dst.write(band_array)
                else:
                    # Individual band writing with descriptions
                    first_shape = list(bands.values())[0].shape
                    for i, band_name in enumerate(band_names, 1):
                        band_data = bands[band_name]
                        if band_data.shape != first_shape:
                            # Resize to match first band shape
                            band_data = cv2.resize(band_data, (first_shape[1], first_shape[0]), 
                                                 interpolation=cv2.INTER_LINEAR)
                        dst.write(band_data.astype(np.float32), i)
                        dst.set_band_description(i, band_name)
                
                # Add metadata
                if not CONFIG['skip_quality_checks']:
                    dst.update_tags(**data['metadata'])
            
            return True
            
        except Exception:
            return False
    
    def check_existing_outputs(self, year_month: str) -> Tuple[bool, bool]:
        """Check if outputs already exist"""
        cloud_free_path = self.cloud_free_dir / f"{year_month}_cloud_free.tif"
        median_path = self.median_dir / f"{year_month}_median.tif"
        return cloud_free_path.exists(), median_path.exists()
    
    def process(self):
        """Main processing pipeline with configurable features"""
        print(f"🚀 Satellite Processor")
        
        # Show optimization status
        optimizations = []
        if CONFIG['downsample_factor'] > 1:
            optimizations.append(f"Downsampling: {CONFIG['downsample_factor']}x")
        if CONFIG['skip_quality_checks']:
            optimizations.append("Quality checks disabled")
        if CONFIG['max_bands_per_composite'] < 10:
            optimizations.append(f"Max bands: {CONFIG['max_bands_per_composite']}")
        
        enabled_features = []
        if CONFIG['enable_vegetation_indices']:
            enabled_features.append("Vegetation Indices")
        if CONFIG['enable_advanced_cloud_detection']:
            enabled_features.append("Advanced Cloud Detection")
        if CONFIG['enable_idw_interpolation']:
            enabled_features.append("IDW Interpolation")
        if CONFIG['enable_radiometric_normalization']:
            enabled_features.append("Radiometric Normalization")
        if CONFIG['enable_super_resolution']:
            enabled_features.append("Super Resolution")
        
        if optimizations:
            print(f"   ⚡ Optimizations: {', '.join(optimizations)}")
        if enabled_features:
            print(f"   ✅ Enabled: {', '.join(enabled_features)}")
        else:
            print(f"   ⚡ Speed mode: All advanced features disabled")
        
        start_time = time.time()
        monthly_data = self.discover_data()
        
        if not monthly_data:
            print("❌ No data found")
            return
        
        processed_count = 0
        
        for month_idx, year_month in enumerate(sorted(monthly_data.keys()), 1):
            print(f"📅 {month_idx}/{len(monthly_data)}: {year_month}", end=' ')
            
            # Check existing outputs first - before any data loading
            cloud_free_path = self.cloud_free_dir / f"{year_month}_cloud_free.tif"
            median_path = self.median_dir / f"{year_month}_median.tif"
            cloud_free_exists = cloud_free_path.exists()
            median_exists = median_path.exists()
            
            if cloud_free_exists and median_exists:
                print("(both exist)")
                continue
            
            month_data = monthly_data[year_month]
            s2_count = len(month_data['s2_items'])
            
            if s2_count == 0:
                print("(no S2)")
                continue
            
            # Only load data if we need to create at least one composite
            max_s2_items = min(s2_count, 10) if CONFIG['skip_quality_checks'] else s2_count
            max_s1_items = min(len(month_data['s1_items']), 5) if CONFIG['skip_quality_checks'] else len(month_data['s1_items'])
            
            # Load S2 data (always needed)
            with ThreadPoolExecutor(max_workers=CONFIG['io_workers']) as executor:
                s2_futures = [executor.submit(self.load_s2_item, item_dir) 
                             for item_dir in month_data['s2_items'][:max_s2_items]]
                s2_items = [f.result() for f in s2_futures]
            
            valid_s2 = sum(1 for _, data in s2_items if data is not None)
            if valid_s2 == 0:
                print("(no valid S2)")
                continue
            
            # Load S1 data only if we need median composite
            s1_items = []
            if not median_exists:
                with ThreadPoolExecutor(max_workers=CONFIG['io_workers']) as executor:
                    s1_futures = [executor.submit(self.load_s1_item, item_dir) 
                                 for item_dir in month_data['s1_items'][:max_s1_items]]
                    s1_items = [f.result() for f in s1_futures]
            
            # Create and save composites
            success_count = 0
            
            if not cloud_free_exists:
                cf_composite = self.create_cloud_free_composite(s2_items)
                if cf_composite and self.save_composite(cf_composite, cloud_free_path):
                    success_count += 1
            else:
                success_count += 1
            
            if not median_exists:
                med_composite = self.create_median_composite(s2_items, s1_items)
                if med_composite and self.save_composite(med_composite, median_path):
                    success_count += 1
            else:
                success_count += 1
            
            print(f"({success_count}/2 saved)")
            processed_count += 1
            
            # Memory cleanup
            del s2_items, s1_items
            if processed_count % 5 == 0:  # Less frequent GC for speed
                gc.collect()
        
        total_time = time.time() - start_time
        print(f"\n🎉 Processing complete!")
        print(f"   ⏱️  Total time: {total_time//60:.0f}m {total_time%60:.1f}s")
        print(f"   📊 Processed: {processed_count} months")

In [None]:
def main():
    """Main execution function"""
    INPUT_DIR = "satellite_data"
    
    try:
        processor = SatelliteProcessor(INPUT_DIR)
        processor.process()
        
    except KeyboardInterrupt:
        print("\n⚠️  Interrupted")
    except Exception as e:
        print(f"\n❌ Error: {e}")

In [None]:
if __name__ == "__main__":
    main()