# Sen1Floods11 Dataset Setup

This notebook handles downloading and processing the Sen1Floods11 dataset from the official GitHub repository.

In [None]:
# Import required libraries
import os
import zipfile
import wget
import logging
from pathlib import Path
import matplotlib.pyplot as plt
import rasterio
import numpy as np

In [None]:
# Setup logging and constants
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )

DATASET_URL = "https://github.com/cloudtostreet/Sen1Floods11/archive/refs/heads/master.zip"
DATASET_PATH = "dataset/sen1floods11.zip"
DATA_DIR = Path('dataset/Sen1Floods11-master/data')

setup_logging()

In [None]:
def download_dataset(url, save_path):
    logging.info(f"Downloading dataset from {url}")
    if not os.path.exists('dataset'):
        os.makedirs('dataset')
    if not os.path.exists(save_path):
        wget.download(url, save_path)
        print("\nDownload complete!")
    else:
        logging.info("Dataset zip already exists")

def extract_dataset(zip_path, extract_path):
    if not os.path.exists(DATA_DIR):
        logging.info("Extracting dataset...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        logging.info("Extraction complete!")
    else:
        logging.info("Dataset already extracted")

def find_images():
    flood_sar = list(DATA_DIR.rglob("*flood*/S1*.tif"))
    nonflood_sar = list(DATA_DIR.rglob("*non_flood*/S1*.tif"))
    flood_masks = list(DATA_DIR.rglob("*flood*/flood_mask*.tif"))
    
    logging.info(f"Found {len(flood_sar)} flood SAR images")
    logging.info(f"Found {len(nonflood_sar)} non-flood SAR images")
    logging.info(f"Found {len(flood_masks)} flood mask images")
    
    return flood_sar, nonflood_sar, flood_masks

In [None]:
def visualize_sample(sar_path, mask_path=None):
    with rasterio.open(sar_path) as src:
        sar_img = src.read(1)
    
    plt.figure(figsize=(12, 6))
    
    plt.subplot(121)
    plt.title('SAR Image')
    plt.imshow(sar_img, cmap='gray')
    
    if mask_path:
        with rasterio.open(mask_path) as src:
            mask = src.read(1)
        plt.subplot(122)
        plt.title('Flood Mask')
        plt.imshow(mask, cmap='binary')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Download and process dataset
try:
    download_dataset(DATASET_URL, DATASET_PATH)
    extract_dataset(DATASET_PATH, 'dataset')
    flood_sar, nonflood_sar, flood_masks = find_images()
    
    # Visualize first flood sample if available
    if flood_sar and flood_masks:
        visualize_sample(str(flood_sar[0]), str(flood_masks[0]))
except Exception as e:
    logging.error(f"Error processing dataset: {str(e)}")
    raise