In [0]:
import os
import tempfile
from osgeo import gdal, gdalconst
import boto3
from botocore.exceptions import ClientError
import logging

In [0]:
os.environ['AWS_ACCESS_KEY_ID'] = dbutils.secrets.get(scope="aws_geospatial_s3", key="access_key")
os.environ['AWS_SECRET_ACCESS_KEY'] = dbutils.secrets.get(scope="aws_geospatial_s3", key="secret_key")
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-2'      # Match your bucket region

In [0]:
from osgeo import gdal
import logging
import tempfile
import os
import boto3
from botocore.exceptions import ClientError
from pyspark.sql.functions import col

# Don't call UseExceptions() immediately - defer it
# gdal.UseExceptions()  # Comment this out

gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN', 'YES')

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def get_s3_client():
    """Initialize and return boto3 S3 client with credentials"""
    return boto3.client('s3',
                       aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
                       aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
                       region_name=os.getenv('AWS_DEFAULT_REGION', 'eu-west-2'))

def upload_to_s3(local_path, s3_path):
    """Upload file to S3 using boto3"""
    s3 = get_s3_client()
    try:
        bucket, key = s3_path.replace("s3://", "").split("/", 1)
        s3.upload_file(local_path, bucket, key)
        logger.info(f"Successfully uploaded to {s3_path}")
        return True
    except ClientError as e:
        logger.error(f"Failed to upload to S3: {e}")
        return False

def merge_tiffs(binary_df, output_path):
    """
    Merges multiple TIFF binary data from a Spark DataFrame into a single TIFF using GDAL
    
    Args:
        binary_df: PySpark DataFrame containing binary TIFF data
        output_path: Path to save the merged TIFF file (can be local or S3 path)
    """
    # Collect all TIFF binary data to driver
    tiff_data = binary_df.select(col("raster_binary")).collect()
    
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_files = []
        
        # Write each binary TIFF to a temporary file
        for i, row in enumerate(tiff_data):
            temp_file = os.path.join(temp_dir, f"temp_{i}.tif")
            with open(temp_file, 'wb') as f:
                f.write(row['raster_binary'])
            temp_files.append(temp_file)
        
        if len(temp_files) > 0:
            try:
                # Enable exceptions here if needed, after imports are done
                # gdal.UseExceptions()
                
                # Create VRT mosaic
                vrt_file = os.path.join(temp_dir, "mosaic.vrt")
                vrt_ds = gdal.BuildVRT(vrt_file, temp_files)
                if vrt_ds is None:
                    raise Exception(f"Failed to create VRT: {gdal.GetLastErrorMsg()}")
                vrt_ds = None  # Close the dataset
                
                # Create local output file
                local_output = os.path.join(temp_dir, "merged_output.tif")
                translate_options = gdal.TranslateOptions(
                    creationOptions=['COMPRESS=DEFLATE', 'TILED=YES', 'BIGTIFF=IF_SAFER']
                )
                ds = gdal.Translate(local_output, vrt_file, options=translate_options)
                if ds is None:
                    raise Exception(f"Failed to translate: {gdal.GetLastErrorMsg()}")
                ds = None  # Close the dataset
                
                # Handle output destination
                if output_path.startswith('s3://'):
                    if upload_to_s3(local_output, output_path):
                        logger.info(f"Merged {len(temp_files)} TIFFs to {output_path}")
                    else:
                        raise Exception("Failed to upload to S3")
                else:
                    os.rename(local_output, output_path)
                    logger.info(f"Merged {len(temp_files)} TIFFs to {output_path}")
                
            except Exception as e:
                logger.error(f"Error during merge: {e}")
                raise
        else:
            logger.warning("No TIFF files found to merge")