In [None]:
# Create directory for dataset
os.makedirs('dataset', exist_ok=True)

# Updated dataset link - using a more reliable GitHub repository
print("Using GitHub repository for Sen1Floods11 dataset...")
url = 'https://github.com/cloudtostreet/Sen1Floods11/archive/refs/heads/master.zip'
output = 'dataset/sen1floods11.zip'

# Check if the dataset is already downloaded
if not os.path.exists(output):
    print("Downloading dataset...")
    !wget {url} -O {output}
    
    # Extract the dataset
    print("Extracting dataset...")
    with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall('dataset')
else:
    print("Dataset already downloaded.")

# Find the SAR and mask files in the dataset
# Note: The paths might need adjustment based on the actual dataset structure
flood_sar_paths = sorted(glob('dataset/Sen1Floods11-master/data/flood_events/bolivia/S1/**/*VH*.tif', recursive=True))
non_flood_sar_paths = sorted(glob('dataset/Sen1Floods11-master/data/flood_events/bolivia/S1_non_flood/**/*VH*.tif', recursive=True))
flood_mask_paths = sorted(glob('dataset/Sen1Floods11-master/data/flood_events/bolivia/flood_mask/*.tif'))

# Print counts to verify
print(f"Found {len(flood_sar_paths)} flood SAR images")
print(f"Found {len(non_flood_sar_paths)} non-flood SAR images")
print(f"Found {len(flood_mask_paths)} flood mask images")

# Ensure we have matching sets
min_count = min(len(flood_sar_paths), len(non_flood_sar_paths), len(flood_mask_paths))
flood_sar_paths = flood_sar_paths[:min_count]
non_flood_sar_paths = non_flood_sar_paths[:min_count]
flood_mask_paths = flood_mask_paths[:min_count]

In [None]:
# Alternative dataset using Hugging Face dataset
print("Using alternative Sen1Floods11 dataset...")

# Install huggingface datasets if needed
!pip install datasets -q

from datasets import load_dataset

# Load dataset from Hugging Face
dataset = load_dataset("intelligent-factory/sen1floods11", split="train")
print(f"Dataset loaded with {len(dataset)} samples")

# Create directories
os.makedirs('dataset/sen1floods11/flood_sar', exist_ok=True)
os.makedirs('dataset/sen1floods11/non_flood_sar', exist_ok=True)
os.makedirs('dataset/sen1floods11/flood_mask', exist_ok=True)

# Save a sample of images to disk
for i, sample in enumerate(dataset.select(range(min(100, len(dataset))))):
    # Save flood SAR
    flood_path = f'dataset/sen1floods11/flood_sar/image_{i}.tif'
    with rasterio.open(flood_path, 'w', 
                      driver='GTiff', 
                      height=sample['flood_sar'].shape[0], 
                      width=sample['flood_sar'].shape[1], 
                      count=1, 
                      dtype=sample['flood_sar'].dtype) as dst:
        dst.write(sample['flood_sar'], 1)
    
    # Save non-flood SAR
    non_flood_path = f'dataset/sen1floods11/non_flood_sar/image_{i}.tif'
    with rasterio.open(non_flood_path, 'w', 
                      driver='GTiff', 
                      height=sample['non_flood_sar'].shape[0], 
                      width=sample['non_flood_sar'].shape[1], 
                      count=1, 
                      dtype=sample['non_flood_sar'].dtype) as dst:
        dst.write(sample['non_flood_sar'], 1)
    
    # Save flood mask
    mask_path = f'dataset/sen1floods11/flood_mask/image_{i}.tif'
    with rasterio.open(mask_path, 'w', 
                      driver='GTiff', 
                      height=sample['flood_mask'].shape[0], 
                      width=sample['flood_mask'].shape[1], 
                      count=1, 
                      dtype=sample['flood_mask'].dtype) as dst:
        dst.write(sample['flood_mask'], 1)

# Get new paths
flood_sar_paths = sorted(glob('dataset/sen1floods11/flood_sar/*.tif'))
non_flood_sar_paths = sorted(glob('dataset/sen1floods11/non_flood_sar/*.tif'))
flood_mask_paths = sorted(glob('dataset/sen1floods11/flood_mask/*.tif'))

print(f"Found {len(flood_sar_paths)} flood SAR images")
print(f"Found {len(non_flood_sar_paths)} non-flood SAR images")
print(f"Found {len(flood_mask_paths)} flood mask images")