In [None]:
# Cell 2: compute 100×100 km bbox around (lat0, lon0)
from pyproj import CRS, Transformer

def make_bbox(lat0, lon0):
    """
    Returns a list of 5 (lon,lat) pairs defining a 100×100 km square 
    centered on (lat0, lon0), in GeoJSON Polygon order (closed ring).
    """
    # local Azimuthal Equidistant projection centred on our point
    aeqd = CRS.from_proj4(f"+proj=aeqd +lat_0={lat0} +lon_0={lon0} +units=m +datum=WGS84")
    wgs84 = CRS.from_epsg(4326)
    to_aeqd = Transformer.from_crs(wgs84, aeqd, always_xy=True)
    to_wgs84 = Transformer.from_crs(aeqd, wgs84, always_xy=True)

    half_side = 25_000  # metres (→ 100 km total side)
    # corners in projected metres, starting lower-left and going counter-clockwise
    proj_corners = [
        (-half_side, -half_side),
        (-half_side,  half_side),
        ( half_side,  half_side),
        ( half_side, -half_side),
        (-half_side, -half_side),
    ]
    # transform back to lon/lat
    ll_corners = []
    for x, y in proj_corners:
        lon, lat = to_wgs84.transform(x, y)
        ll_corners.append((lon, lat))
    return ll_corners


In [None]:
# Cell 3: choose your centre point here:
lat0, lon0 = 61.170966970204304, -161.90796944935005

corners = make_bbox(lat0, lon0)

# build your GeoJSON‐style dict
rect = {
    "type": "Polygon",
    "coordinates": [ corners ]
}

# print in the exact format you requested:
print("rect = {")
print('    "type": "Polygon",')
print('    "coordinates": [[')
for lon, lat in corners:
    print(f"        [{lon:.6f}, {lat:.6f}],")
print("    ]]")
print("}")


In [None]:
# Cell 4: render with Folium
import folium

# folium wants [(lat,lon),…]
folium_points = [(lat, lon) for lon, lat in corners]

m = folium.Map(location=(lat0, lon0), zoom_start=8)
folium.Polygon(locations=folium_points, color="red", weight=3, fill=False).add_to(m)
m  # in Jupyter this will display the interactive map

In [6]:
import yaml
import requests
import os
import time
import zipfile
import shutil
import csv
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import random
import backoff

# —– load your key & session —–
with open(r"D:\planetscope_lake_ice\planet.yaml", 'r') as f:
    PLANET_API_KEY = yaml.safe_load(f)['api_key']

BASE_URL = "https://api.planet.com/data/v1"
session = requests.Session()
session.auth = (PLANET_API_KEY, "")

# Asset keys to download
ASSET_KEYS = [
    'ortho_analytic_4b_sr',
    'ortho_analytic_4b_xml',
    'ortho_udm2'
]
# Extension mapping
EXT_MAP = {
    'ortho_analytic_4b_sr': '.tif',
    'ortho_analytic_4b_xml': '.xml',
    'ortho_udm2': '.tif'
}

# Rate limiting parameters
MAX_REQUESTS_PER_SECOND = 4  # Setting to 4 to stay safely below 5 req/sec limit
REQUEST_INTERVAL = 1.0 / MAX_REQUESTS_PER_SECOND  # Time between requests
last_request_time = time.time()

# Custom exception for rate limit errors
class RateLimitException(Exception):
    pass

# Rate limited session wrapper
def rate_limited_request(method, url, **kwargs):
    """Make a rate-limited request and return the response."""
    global last_request_time
    
    # Calculate time to wait to respect rate limit
    now = time.time()
    elapsed = now - last_request_time
    wait_time = max(0, REQUEST_INTERVAL - elapsed)
    
    if wait_time > 0:
        time.sleep(wait_time)
    
    # Update last request time
    last_request_time = time.time()
    
    # Make the request
    response = method(url, **kwargs)
    
    # Check for rate limit errors
    if response.status_code == 429:
        retry_after = int(response.headers.get('Retry-After', 1))
        raise RateLimitException(f"Rate limit exceeded. Retry after {retry_after}s")
    
    return response

# Backoff decorator for handling rate limits
@backoff.on_exception(
    backoff.expo,
    (RateLimitException, requests.exceptions.RequestException),
    max_tries=10,  # Maximum number of retries
    max_time=300,  # Maximum time to retry (5 minutes)
    jitter=backoff.full_jitter,  # Add jitter to prevent thundering herd
    on_backoff=lambda details: print(f"Backing off {details['wait']:.1f}s after {details['tries']} tries. Error: {details['exception']}")
)
def make_request_with_retry(method, url, **kwargs):
    """Make request with retry logic."""
    try:
        return rate_limited_request(method, url, **kwargs)
    except requests.exceptions.RequestException as e:
        # If it's a rate limit error, use specific handling
        if e.response is not None and e.response.status_code == 429:
            retry_after = int(e.response.headers.get('Retry-After', 5))
            print(f"Rate limit hit. Waiting {retry_after}s before retry...")
            time.sleep(retry_after)
            raise RateLimitException(str(e))
        raise

def list_images(AOI, start, end, cloud_threshold):
    """
    Search PSScene over AOI & date window, return list of features
    filtered by cloud cover <= threshold. Handles pagination.
    """
    print(f"Searching for images from {start} to {end}...")
    features = []
    body = {
        "item_types": ["PSScene"],
        "filter": {
            "type": "AndFilter",
            "config": [
                {"type": "GeometryFilter", "field_name": "geometry", "config": AOI},
                {"type": "DateRangeFilter", "field_name": "acquired",
                 "config": {"gte": start, "lte": end}}
            ]
        }
    }
    
    try:
        resp = make_request_with_retry(session.post, f"{BASE_URL}/quick-search", json=body)
        resp.raise_for_status()
        data = resp.json()
        
        page_count = 1
        while True:
            print(f"Processing page {page_count} of search results...")
            page_count += 1
            
            for feat in data.get('features', []):
                cloud_cover = feat['properties'].get('cloud_cover', 1.0)
                if cloud_cover <= cloud_threshold:
                    features.append(feat)
            
            next_link = data.get('_links', {}).get('next')
            if not next_link:
                print("No more pages to process.")
                break
                
            print(f"Fetching next page from {next_link}")
            resp = make_request_with_retry(session.get, next_link)
            resp.raise_for_status()
            data = resp.json()
    except Exception as e:
        print(f"Error during image search: {str(e)}")
        raise

    print(f"Total images found meeting criteria: {len(features)}")
    return features

def batch_activate_assets(features, asset_keys, test_n_images=0):
    """
    Activate all assets for all features in batch mode and return
    a dict with activation status for each asset.
    """
    if test_n_images > 0:
        print(f"TEST MODE: Limiting activation to first {test_n_images} images (out of {len(features)})")
        features = features[:test_n_images]
    else:
        print(f"Activating assets for all {len(features)} images")
    
    # Create a mapping of item_id -> asset_url and collect all assets that need activation
    asset_mapping = {}
    assets_to_activate = {}
    
    print("Checking asset statuses...")
    for feat in features:
        item_id = feat['id']
        item_type = feat['properties']['item_type']
        assets_url = f"{BASE_URL}/item-types/{item_type}/items/{item_id}/assets/"
        
        # Store the assets URL for later use
        asset_mapping[item_id] = assets_url
        
        # Check status of each asset
        resp = make_request_with_retry(session.get, assets_url)
        resp.raise_for_status()
        assets_data = resp.json()
        
        for key in asset_keys:
            if key in assets_data:
                asset = assets_data[key]
                if asset.get('status') != 'active':
                    # If not active, add to activation list
                    if key not in assets_to_activate:
                        assets_to_activate[key] = []
                    assets_to_activate[key].append((item_id, asset['_links']['activate']))
    
    # Activate all non-active assets serially to respect rate limits
    for key, activation_list in assets_to_activate.items():
        if not activation_list:
            continue
            
        print(f"Activating {len(activation_list)} assets of type '{key}'...")
        
        # Process activations with controlled concurrency
        with ThreadPoolExecutor(max_workers=2) as executor:  # Limit concurrent requests
            for batch_start in range(0, len(activation_list), 5):
                batch = activation_list[batch_start:batch_start + 5]
                print(f"Processing activation batch {batch_start//5 + 1}/{(len(activation_list) + 4)//5}")
                
                futures = []
                for item_id, activate_url in batch:
                    print(f"Submitting activation request for {item_id} - {key}")
                    future = executor.submit(make_request_with_retry, session.post, activate_url)
                    futures.append((item_id, key, future))
                
                # Wait for this batch to complete before starting next batch
                for item_id, key, future in futures:
                    try:
                        resp = future.result()
                        resp.raise_for_status()
                        print(f"Activation request sent for {item_id} - {key}")
                    except Exception as e:
                        print(f"Error activating {item_id} - {key}: {str(e)} - Will retry automatically")
    
    print("All activation requests submitted, waiting for assets to become active...")
    
    # Wait for all assets to become active
    all_active = False
    poll_count = 0
    
    while not all_active:
        poll_count += 1
        print(f"\nPolling attempt #{poll_count} - Checking activation status...")
        all_active = True
        time.sleep(5)  # Wait 5 seconds between polls
        
        # Check status of each asset for each item
        inactive_count = 0
        for feat in features:
            item_id = feat['id']
            assets_url = asset_mapping[item_id]
            
            # Get current status of all assets
            try:
                resp = make_request_with_retry(session.get, assets_url)
                resp.raise_for_status()
                assets_data = resp.json()
                
                # Check if all required assets are active
                for key in asset_keys:
                    if key in assets_data:
                        asset = assets_data[key]
                        status = asset.get('status')
                        
                        if status != 'active':
                            inactive_count += 1
                            all_active = False
                            if poll_count % 4 == 0:  # Only print detailed status every 20 seconds
                                print(f"Asset {key} for {item_id} is not yet active (status: {status})")
            except Exception as e:
                print(f"Error checking status for {item_id}: {str(e)} - Will continue polling")
                all_active = False  # Consider as not active if there was an error
        
        if inactive_count > 0:
            print(f"Still waiting for {inactive_count} assets to become active...")
        else:
            print("All assets are now active!")
    
    return asset_mapping

def download_assets_batch(features, asset_mapping, base_folder, order_name, test_n_images=0):
    """
    Download all specified assets for given features into a single zip file named {order_name}.zip.
    Assumes all assets have been activated already.
    """
    print(f"\nPreparing to download assets for order: {order_name}")
    os.makedirs(base_folder, exist_ok=True)
    zip_path = os.path.join(base_folder, f"{order_name}.zip")
    print(f"Assets will be saved to: {zip_path}")
    
    # Apply test limit if specified
    if test_n_images > 0:
        print(f"TEST MODE: Limiting download to first {test_n_images} images (out of {len(features)})")
        features = features[:test_n_images]

    count = 0
    try:
        with zipfile.ZipFile(zip_path, 'w') as z:
            for i, feat in enumerate(features, 1):
                item_id = feat['id']
                assets_url = asset_mapping[item_id]
                print(f"\nDownloading assets for image {i}/{len(features)}: {item_id}")
                
                # Get all assets for this item
                resp = make_request_with_retry(session.get, assets_url)
                resp.raise_for_status()
                assets_data = resp.json()
                
                for key in ASSET_KEYS:
                    if key not in assets_data:
                        print(f"Asset {key} not available for {item_id}")
                        continue
                        
                    asset = assets_data[key]
                    print(f"Downloading asset {key} for {item_id}...")
                    
                    try:
                        download_start = time.time()
                        resp = make_request_with_retry(session.get, asset['location'])
                        resp.raise_for_status()
                        data = resp.content
                        download_time = time.time() - download_start
                        data_size_mb = len(data)/1024/1024
                        print(f"Download complete: {data_size_mb:.2f} MB in {download_time:.2f}s ({data_size_mb/download_time:.2f} MB/s)")

                        ext = EXT_MAP.get(key, '')
                        filename = f"{item_id}_{key}{ext}"
                        print(f"Adding {filename} to zip archive...")
                        z.writestr(filename, data)
                        count += 1
                    except Exception as e:
                        print(f"Error downloading asset {key} for {item_id}: {str(e)} - Will retry")
                        # Retry this specific asset download
                        retry_attempts = 3
                        for attempt in range(retry_attempts):
                            try:
                                print(f"Retry attempt {attempt+1}/{retry_attempts} for {item_id} - {key}")
                                time.sleep((attempt + 1) * 2)  # Increasing backoff
                                resp = make_request_with_retry(session.get, asset['location'])
                                resp.raise_for_status()
                                data = resp.content
                                ext = EXT_MAP.get(key, '')
                                filename = f"{item_id}_{key}{ext}"
                                z.writestr(filename, data)
                                count += 1
                                print(f"Retry successful for {item_id} - {key}")
                                break
                            except Exception as retry_e:
                                print(f"Retry {attempt+1} failed: {str(retry_e)}")
                                if attempt == retry_attempts - 1:
                                    print(f"All retries failed for {item_id} - {key}")
    except Exception as e:
        print(f"Error during batch download: {str(e)}")
        raise

    print(f"\nDownload complete for order {order_name}. Total assets downloaded: {count}")
    return count

def download_seasonal_orders(AOI, study_site_folder, backup_folder,
                             years, seasons, cloud_threshold=0.5, test_n_images=0):
    """
    For each year and season, list images, activate assets in batch, download assets, 
    log to a single download_log.txt, and build a master image_roster.csv.
    
    Parameters:
    - test_n_images: If > 0, only process the first n images for each season/year
    """
    print(f"\n{'=' * 60}")
    print(f"Starting download process at {datetime.now().isoformat()}")
    print(f"Study site folder: {study_site_folder}")
    print(f"Backup folder: {backup_folder}")
    print(f"Cloud threshold: {cloud_threshold}")
    print(f"Rate limit: {MAX_REQUESTS_PER_SECOND} requests per second")
    if test_n_images > 0:
        print(f"TEST MODE ACTIVE: Limited to first {test_n_images} images per season/year")
    print(f"{'=' * 60}\n")
    
    # Single log file in base
    log_path = os.path.join(study_site_folder, 'download_log.txt')
    # For CSV: collect { order_name: [image_id, ...] }
    roster_dict = {}

    for year in years:
        for season, (start_tmpl, end_tmpl) in seasons.items():
            start = start_tmpl.format(year=year)
            end = end_tmpl.format(year=year)
            order_name = f"{season}_{year}"
            print(f"\n{'=' * 60}")
            print(f"Processing {order_name}: {start} to {end}")
            print(f"{'=' * 60}")

            # 1) list & filter
            feats = list_images(AOI, start, end, cloud_threshold)
            image_ids = [f['id'] for f in feats]
            roster_dict[order_name] = image_ids
            print(f"Found {len(image_ids)} images meeting criteria.")
            
            if not feats:
                print(f"No images found for {order_name}, skipping to next season/year.")
                continue

            # 2) choose storage
            free = shutil.disk_usage(study_site_folder).free
            if free < 5 * 1024**3:
                base_folder = backup_folder
                print(f"WARNING: Less than 5 GB free space in primary folder")
                print(f"Switching to backup folder: {backup_folder}")
            else:
                base_folder = study_site_folder
                print(f"Using primary folder: {study_site_folder}")
                print(f"Free space: {free / 1024**3:.2f} GB")

            # 3) activate assets in batch for the entire season
            print(f"\nActivating assets for {order_name}...")
            t0_activation = time.time()
            try:
                asset_mapping = batch_activate_assets(feats, ASSET_KEYS, test_n_images)
                activation_time = time.time() - t0_activation
                print(f"All assets activated for {order_name} in {activation_time:.1f} seconds")
            except Exception as e:
                print(f"ERROR during activation for {order_name}: {str(e)}")
                continue
            
            # 4) download activated assets
            t0_download = time.time()
            try:
                num_assets = download_assets_batch(feats, asset_mapping, base_folder, order_name, test_n_images)
                download_time = time.time() - t0_download
                total_time = time.time() - t0_activation
                print(f"Download complete for {order_name}")
                print(f"Downloaded {num_assets} assets in {download_time:.1f} seconds")
                print(f"Total processing time: {total_time:.1f} seconds")
            except Exception as e:
                print(f"ERROR during download for {order_name}: {str(e)}")
                total_time = time.time() - t0_activation
                num_assets = 0

            # 5) append to log
            print(f"Updating download log at {log_path}")
            with open(log_path, 'a') as logf:
                log_entry = (
                    f"{datetime.now().isoformat()} | {order_name} | "
                    f"images={len(image_ids)} | assets={num_assets} | "
                    f"time_s={total_time:.1f}"
                )
                if test_n_images > 0:
                    log_entry += f" | test_n_images_override={test_n_images}"
                log_entry += "\n"
                logf.write(log_entry)

    # 6) write master CSV
    csv_path = os.path.join(study_site_folder, 'image_roster.csv')
    print(f"\nGenerating master image roster at {csv_path}")
    all_orders = list(roster_dict.keys())
    max_len = max(len(v) for v in roster_dict.values()) if roster_dict else 0

    with open(csv_path, 'w', newline='') as csvf:
        writer = csv.writer(csvf)
        writer.writerow(all_orders)
        for i in range(max_len):
            row = [roster_dict[ord][i] if i < len(roster_dict[ord]) else ''
                   for ord in all_orders]
            writer.writerow(row)

    print(f"\n{'=' * 60}")
    print(f"Processing complete at {datetime.now().isoformat()}")
    print(f"Log updated at: {log_path}")
    print(f"Master roster CSV at: {csv_path}")
    print(f"{'=' * 60}")

def extract_all_zips(folder_path):
    """
    Extract all zip files found in the specified folder,
    then delete the original zip files after successful extraction.
    """
    print(f"\n{'=' * 60}")
    print(f"Starting zip extraction process for {folder_path} at {datetime.now().isoformat()}")
    print(f"{'=' * 60}\n")
    
    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} does not exist, skipping.")
        return
    
    # Find all zip files in the folder
    zip_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) 
                if f.endswith('.zip') and os.path.isfile(os.path.join(folder_path, f))]
    
    if not zip_files:
        print(f"No zip files found in {folder_path}.")
        return
    
    print(f"Found {len(zip_files)} zip files to extract.")
    
    # Process each zip file
    success_count = 0
    fail_count = 0
    
    for zip_path in zip_files:
        try:
            zip_name = os.path.basename(zip_path)
            extract_folder = os.path.join(folder_path, os.path.splitext(zip_name)[0])
            
            print(f"\nProcessing: {zip_name}")
            print(f"Extracting to: {extract_folder}")
            
            # Create extraction folder
            os.makedirs(extract_folder, exist_ok=True)
            
            # Extract the zip file
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                file_count = len(zip_ref.namelist())
                print(f"Zip contains {file_count} files")
                zip_ref.extractall(extract_folder)
            
            # Verify extraction
            extracted_files = []
            for root, _, files in os.walk(extract_folder):
                extracted_files.extend(files)
            
            if len(extracted_files) == file_count:
                print(f"Extraction successful: {file_count} files extracted")
                print(f"Deleting original zip file: {zip_path}")
                os.remove(zip_path)
                success_count += 1
            else:
                print(f"WARNING: Extraction verification failed. Expected {file_count} files but found {len(extracted_files)}")
                print(f"Original zip file {zip_path} preserved for manual inspection")
                fail_count += 1
                
        except Exception as e:
            print(f"ERROR extracting {zip_path}: {str(e)}")
            print(f"Original zip file preserved for manual inspection")
            fail_count += 1
    
    print(f"\n{'=' * 60}")
    print(f"Extraction process completed for {folder_path} at {datetime.now().isoformat()}")
    print(f"Successfully processed: {success_count} zip files")
    if fail_count > 0:
        print(f"Failed: {fail_count} zip files - see above for details")
    print(f"{'=' * 60}")

In [None]:
rect = {
    "type": "Polygon",
    "coordinates": [[
        [-162.369230, 60.945820],
        [-162.375829, 61.394512],
        [-161.440110, 61.394512],
        [-161.446709, 60.945820],
        [-162.369230, 60.945820],
    ]]
}

years = [2020, 2021, 2022, 2023, 2024]
seasons = {
    "Breakup": ("{year}-04-15T00:00:00Z", "{year}-06-15T23:59:59Z"),
    "Freezeup": ("{year}-10-01T00:00:00Z", "{year}-11-30T23:59:59Z")
}

primary = r"D:\planetscope_lake_ice\Data\Input\YKD"
backup = r"C:\Users\nj142\Desktop\Fallback"

download_seasonal_orders(
    AOI=rect,
    study_site_folder=primary,
    backup_folder=backup,
    years=years,
    seasons=seasons,
    cloud_threshold=0.5,
)

extract_all_zips(primary)
extract_all_zips(backup)



Starting download process at 2025-04-28T12:56:38.925189
Study site folder: D:\planetscope_lake_ice\Data
Backup folder: C:\Users\nj142\Desktop\Fallback
Cloud threshold: 0.5
Rate limit: 4 requests per second


Processing Breakup_2019: 2019-04-15T00:00:00Z to 2019-06-15T23:59:59Z
Searching for images from 2019-04-15T00:00:00Z to 2019-06-15T23:59:59Z...
Processing page 1 of search results...
No more pages to process.
Total images found meeting criteria: 172
Found 172 images meeting criteria.
Using primary folder: D:\planetscope_lake_ice\Data
Free space: 1483.57 GB

Activating assets for Breakup_2019...
Activating assets for all 172 images
Checking asset statuses...


KeyboardInterrupt: 

In [5]:
extract_all_zips(r"D:\planetscope_lake_ice\Data")


Starting zip extraction process for D:\planetscope_lake_ice\Data at 2025-04-28T12:31:13.834562

Found 1 zip files to extract.

Processing: Freezeup_2019.zip
Extracting to: D:\planetscope_lake_ice\Data\Freezeup_2019
Zip contains 367 files
Extraction successful: 367 files extracted
Deleting original zip file: D:\planetscope_lake_ice\Data\Freezeup_2019.zip

Extraction process completed for D:\planetscope_lake_ice\Data at 2025-04-28T12:54:38.366747
Successfully processed: 1 zip files
