# **Chipping Images**

In [17]:
import rasterio
from rasterio.windows import Window
import numpy as np
import torch
from rasterio.merge import merge


In [None]:

# Define the input and output chip size
CHIP_SIZE = 512 

def chip_image(input_filepath, output_directory):
    """Cuts a large GeoTIFF into smaller chips."""
    with rasterio.open(input_filepath) as src:
        # Get the width and height of the entire image
        width = src.width
        height = src.height
        
        count = 0
        # Loop through the image in chunks of CHIP_SIZE
        for i in range(0, height, CHIP_SIZE):
            for j in range(0, width, CHIP_SIZE):
                
                # Define the window (area) to read from the large image
                # The window accounts for the edges where the size might be less than CHIP_SIZE
                window = Window(j, i, min(CHIP_SIZE, width - j), min(CHIP_SIZE, height - i))
                transform = src.window_transform(window)
                
                # Read the data from the defined window
                chip_data = src.read(1, window=window)

                # Skip if the chip contains mostly "no data" values (e.g., beyond your AOI)
                if np.sum(chip_data == src.nodata) / chip_data.size > 0.95:
                    continue

                # Update the metadata profile for the new small chip file
                profile = src.profile
                profile.update({
                    'height': window.height,
                    'width': window.width,
                    'transform': transform
                })
                
                # Save the chip
                output_path = f"{output_directory}/{src.name.split('/')[-1].replace('.tif', '')}_chip_{count}.tif"
                with rasterio.open(output_path, 'w', **profile) as dst:
                    dst.write(chip_data, 1)
                
                count += 1
                
    print(f"Successfully chipped {input_filepath} into {count} tiles.")


In [None]:
# =================================================================
# GLOBAL CONFIGURATION
# =================================================================

# IMPORTANT: Use 'r' strings for Windows paths to avoid SyntaxWarnings/Errors
TIF_DIR = r'C:\Kaam_Dhanda\Minor_Project\Old Images' 
OUTPUT_CHIPS_DIR = r'C:\Kaam_Dhanda\Minor_Project\Output_chips' 

# List of districts to process
DISTRICTS = ['Barpeta', 'Dhemaji', 'Lakhimpur', 'Nalbari', 'Sonitpur']
CHIP_SIZE = 512 # Standard size for deep learning input (e.g., 512x512 pixels)


# =================================================================
# CHIPPING FUNCTION (Core Logic)
# =================================================================

def chip_image(input_filepath, output_directory):
    """Cuts a large GeoTIFF into smaller, non-overlapping chips."""
    
    # 1. Safely open the input image
    try:
        src = rasterio.open(input_filepath)
    except rasterio.RasterioIOError as e:
        print(f"Error opening input file {input_filepath}: {e}")
        return

    width = src.width
    height = src.height
    count = 0
    
    # Loop through the image in chunks of CHIP_SIZE
    for i in range(0, height, CHIP_SIZE):
        for j in range(0, width, CHIP_SIZE):
            
            # Define the window (area) to read from the large image
            window = Window(j, i, min(CHIP_SIZE, width - j), min(CHIP_SIZE, height - i))
            transform = src.window_transform(window)
            
            # Read the data from the defined window (assuming single band: 'VV')
            chip_data = src.read(1, window=window)

            # Skip if the chip contains mostly 'no data' values (e.g., beyond your AOI)
            if np.sum(chip_data == src.nodata) / chip_data.size > 0.95:
                continue

            # Update the metadata profile for the new small chip file
            profile = src.profile
            profile.update({
                'height': window.height,
                'width': window.width,
                'transform': transform,
                'count': 1, # Ensure the profile reflects a single band
                'compress': 'LZW' # Optional: Add compression to reduce chip size
            })
            
            # --- CRITICAL FIX: Robust Output Path Construction ---
            # 1. Get the base filename (e.g., 'Barpeta_PreFlood_Image.tif')
            base_filename = os.path.basename(input_filepath)
            
            # 2. Remove the '.tif' extension for the chip name stem
            file_stem = base_filename.replace('.tif', '')

            # 3. Construct the final output path using os.path.join()
            chip_filename = f'{file_stem}_chip_{count}.tif'
            output_path = os.path.join(output_directory, chip_filename)
            
            # 4. Save the chip
            try:
                with rasterio.open(output_path, 'w', **profile) as dst:
                    dst.write(chip_data, 1)
                count += 1
            except rasterio.RasterioIOError as e:
                 print(f"Failed to write chip {output_path}: {e}")

    src.close()
    print(f"✅ Successfully chipped {input_filepath} into {count} tiles.")


# =================================================================
# MAIN EXECUTION LOGIC
# =================================================================

files_to_chip = {}

# --- Generate File Pairs ---
for district in DISTRICTS:
    pre_file = os.path.join(TIF_DIR, f'{district}_PreFlood_Image.tif')
    post_file = os.path.join(TIF_DIR, f'{district}_PostFlood_Image.tif')
    
    if os.path.exists(pre_file) and os.path.exists(post_file):
        files_to_chip[district] = {
            'pre_flood': pre_file,
            'post_flood': post_file
        }
    else:
        print(f"⚠️ Skipping {district}: One or both primary files were not found.")
        
print(f"Successfully prepared {len(files_to_chip)} district pairs for chipping.")


# --- Run Chipping Process ---
for district, files in files_to_chip.items():
    print(f"\n--- Chipping files for {district} ---")
    
    # Define the output directories
    pre_output_dir = os.path.join(OUTPUT_CHIPS_DIR, district, 'pre_flood')
    post_output_dir = os.path.join(OUTPUT_CHIPS_DIR, district, 'post_flood')

    # Create the output directories if they don't exist
    os.makedirs(pre_output_dir, exist_ok=True)
    os.makedirs(post_output_dir, exist_ok=True)
    
    # Run chipping for the pre-flood image
    chip_image(
        input_filepath=files['pre_flood'],
        output_directory=pre_output_dir
    )
    
    # Run chipping for the post-flood image
    chip_image(
        input_filepath=files['post_flood'],
        output_directory=post_output_dir
    )

Successfully prepared 5 district pairs for chipping.

--- Chipping files for Barpeta ---
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Barpeta_PreFlood_Image.tif into 96 tiles.
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Barpeta_PostFlood_Image.tif into 96 tiles.

--- Chipping files for Dhemaji ---
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Dhemaji_PreFlood_Image.tif into 104 tiles.
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Dhemaji_PostFlood_Image.tif into 104 tiles.

--- Chipping files for Lakhimpur ---
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Lakhimpur_PreFlood_Image.tif into 324 tiles.
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Lakhimpur_PostFlood_Image.tif into 324 tiles.

--- Chipping files for Nalbari ---
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old Images\Nalbari_PreFlood_Image.tif into 80 tiles.
✅ Successfully chipped C:\Kaam_Dhanda\Minor_Project\Old

In [3]:
import numpy as np
import rasterio
import torch
import os

# =======================================================================
# CONFIGURATION
# =======================================================================

# IMPORTANT: SET YOUR ROOT DIRECTORY HERE
ROOT_CHIPS_DIR = r'C:\Kaam_Dhanda\Minor_Project\Output_chips'

# Sentinel-1 Normalization Parameters for VV (based on common practice)
# NOTE: These are general values. For maximum accuracy, check the specific
# Prithvi-600m documentation for its exact SAR data normalization.
SAR_NORM_MEAN = -15.0  # Common mean for VV dB values
SAR_NORM_STD = 5.0    # Common standard deviation for VV dB values

# Dictionary to store all processed tensors
processed_tensors = {}

# =======================================================================
# CORE PROCESSING FUNCTION
# =======================================================================

def preprocess_sar_chip(file_path, sar_mean, sar_std):
    """
    Reads a single-band SAR GeoTIFF, standardizes it, and converts it
    to a PyTorch Tensor (1, C=1, H, W) for model inference.
    """
    try:
        with rasterio.open(file_path) as src:
            # Read the single band (VV)
            data = src.read(1).astype(np.float32)
            
            # Check for empty data / no-data values
            if np.all(data == src.nodata):
                return None

    except rasterio.RasterioIOError:
        print(f"Error: Could not open or read {file_path}. Skipping.")
        return None

    # 1. Standardization (Z-Score Normalization)
    # Apply Z-score: (Data - Mean) / Std Dev
    normalized_data = (data - sar_mean) / sar_std

    # 2. Convert to PyTorch Tensor
    # Reshape from (H, W) to (C, H, W) -> (1, H, W)
    tensor = torch.from_numpy(normalized_data).unsqueeze(0)
    
    # Add a batch dimension, making the shape (1, C, H, W) -> (1, 1, H, W)
    tensor = tensor.unsqueeze(0) 

    return tensor

# =======================================================================
# BATCH EXECUTION
# =======================================================================

print(f"Starting batch pre-processing from: {ROOT_CHIPS_DIR}")

# Iterate through all district folders (Barpeta, Dhemaji, etc.)
for district_name in os.listdir(ROOT_CHIPS_DIR):
    district_path = os.path.join(ROOT_CHIPS_DIR, district_name)
    
    if not os.path.isdir(district_path):
        continue

    processed_tensors[district_name] = {'pre_flood': [], 'post_flood': []}
    print(f"\n--- Processing District: {district_name} ---")

    # Iterate through 'pre_flood' and 'post_flood' folders
    for phase in ['pre_flood', 'post_flood']:
        phase_path = os.path.join(district_path, phase)
        
        if not os.path.isdir(phase_path):
            continue

        # Process all .tif files (image chips) in the phase folder
        for chip_filename in os.listdir(phase_path):
            if chip_filename.endswith('.tif'):
                chip_file_path = os.path.join(phase_path, chip_filename)
                
                # Run the core pre-processing function
                tensor = preprocess_sar_chip(
                    chip_file_path, SAR_NORM_MEAN, SAR_NORM_STD
                )
                
                if tensor is not None:
                    # Store the resulting tensor
                    processed_tensors[district_name][phase].append(tensor)
                    # print(f"    Processed: {chip_filename}")

# =======================================================================
# FINAL CHECK
# =======================================================================

print("\n=======================================================")
print("✅ Batch Pre-processing Complete.")
print("=======================================================")

# Print the final structure for verification
for district, phases in processed_tensors.items():
    print(f"District: {district}")
    for phase, tensors in phases.items():
        if tensors:
            # Check the shape of the first tensor in the list
            print(f"  {phase}: {len(tensors)} chips, each with shape {tensors[0].shape}")
        else:
            print(f"  {phase}: 0 chips found.")

# The 'processed_tensors' dictionary now holds all your data ready for the Prithvi model.


Starting batch pre-processing from: C:\Kaam_Dhanda\Minor_Project\Output_chips

--- Processing District: Barpeta ---

--- Processing District: Dhemaji ---

--- Processing District: Lakhimpur ---

--- Processing District: Nalbari ---

--- Processing District: Sonitpur ---

✅ Batch Pre-processing Complete.
District: Barpeta
  pre_flood: 96 chips, each with shape torch.Size([1, 1, 512, 512])
  post_flood: 96 chips, each with shape torch.Size([1, 1, 512, 512])
District: Dhemaji
  pre_flood: 104 chips, each with shape torch.Size([1, 1, 512, 512])
  post_flood: 104 chips, each with shape torch.Size([1, 1, 512, 512])
District: Lakhimpur
  pre_flood: 324 chips, each with shape torch.Size([1, 1, 512, 512])
  post_flood: 324 chips, each with shape torch.Size([1, 1, 512, 512])
District: Nalbari
  pre_flood: 80 chips, each with shape torch.Size([1, 1, 512, 512])
  post_flood: 80 chips, each with shape torch.Size([1, 1, 512, 512])
District: Sonitpur
  pre_flood: 198 chips, each with shape torch.Size

## **run the temporal AI inference and then stitch the predictions back together**

In [14]:

def prepare_chip_pair(pre_path, post_path):
    with rasterio.open(pre_path) as src_pre, rasterio.open(post_path) as src_post:
        # Load data as NumPy arrays (assuming single band, VV polarization)
        pre_chip = src_pre.read(1)
        post_chip = src_post.read(1)
        
        # Stack them to create the temporal input (e.g., shape: 2, 512, 512)
        temporal_input = np.stack([pre_chip, post_chip], axis=0)
        
        # Convert to PyTorch Tensor, add a batch dimension (1), and move to GPU (if available)
        tensor_input = torch.from_numpy(temporal_input).float().unsqueeze(0)
        
        # Store the geospatial profile for later stitching
        profile = src_pre.profile
        
    return tensor_input, profile

In [15]:

def run_inference_and_save(pre_path, post_path, output_mask_dir):
    tensor_input, profile = prepare_chip_pair(pre_path, post_path)
    
    # 1. Run the prediction
    # model.eval() is required for inference mode
    with torch.no_grad():
        # output is typically a logit map (e.g., shape: 1, num_classes, 512, 512)
        output_logits = model(tensor_input) 
        
    # 2. Get the final classification (0 or 1)
    # This finds the class with the highest probability (e.g., 0=not-flood, 1=flood)
    # Reshape and convert back to a NumPy array (shape: 512, 512)
    predicted_mask_tensor = torch.argmax(output_logits, dim=1).squeeze().cpu()
    predicted_mask_array = predicted_mask_tensor.numpy().astype(rasterio.uint8)
    
    # 3. Save the prediction mask
    chip_filename = os.path.basename(pre_path).replace('PreFlood_Image', 'Flood_Mask')
    output_path = os.path.join(output_mask_dir, chip_filename)
    
    # Update profile to reflect the new data type (binary mask)
    profile.update(dtype=rasterio.uint8, count=1) 
    
    with rasterio.open(output_path, 'w', **profile) as dst:
        dst.write(predicted_mask_array, 1)
    
    return output_path

In [None]:

def stitch_masks(mask_dir, district_name, final_output_dir):
    """Stitches all predicted flood mask chips into a single GeoTIFF."""
    
    mask_files = [os.path.join(mask_dir, f) for f in os.listdir(mask_dir) if f.endswith('.tif')]
    
    # Open all mask datasets
    sources = [rasterio.open(f) for f in mask_files]
    
    # Use rasterio.merge to create a mosaic
    stitched_array, out_transform = merge(sources)
    
    # Get the metadata from the first source file
    out_meta = sources[0].profile.copy()
    
    # Update the metadata for the merged output
    out_meta.update({
        "driver": "GTiff",
        "height": stitched_array.shape[1],
        "width": stitched_array.shape[2],
        "transform": out_transform,
        "count": 1,
        "dtype": 'uint8'
    })
    
    # Write the final stitched GeoTIFF
    final_output_path = os.path.join(final_output_dir, f'{district_name}_Final_Flood_Mask.tif')
    with rasterio.open(final_output_path, "w", **out_meta) as dest:
        dest.write(stitched_array)
        
    # Close all source files
    for src in sources:
        src.close()
        
    print(f"✅ Final stitched mask saved to: {final_output_path}")