In [1]:
"""
Planet API Satellite Image Downloader - Individual Asset Status System with 3b/4b Fallback

A robust, rate-limited downloader for Planet satellite imagery with individual
asset status tracking and queue management. Supports fallback from 4b to 3b assets.

Features:
- Individual status column for each asset: "Needs Activation 4B" (or 3B), "Downloaded", "Does Not Exist", "Failed Activation"
- Fallback from 4b to 3b assets when 4b not available
- Season-by-season processing with 90-minute timeout per batch
- Parallel activation checking and downloading
- Automatic zip extraction and verification
- Progress tracking and comprehensive logging
"""

# =============================================================================
# CELL 1: IMPORTS AND UTILITIES
# =============================================================================

import os
import csv
import time
import yaml
import requests
import threading
import backoff
import zipfile
import shutil
from datetime import datetime, timedelta
import pandas as pd
from collections import deque
import glob
import queue
from concurrent.futures import ThreadPoolExecutor
import json

# =============================================================================
# RATE LIMITING AND REQUEST HANDLING
# =============================================================================

class RateLimiter:
    """Thread-safe rate limiter for API requests"""
    
    def __init__(self, max_requests_per_second=4):
        self.max_requests_per_second = max_requests_per_second
        self.min_interval = 1.0 / max_requests_per_second
        self.last_request_time = 0
        self.lock = threading.Lock()
        
        # Create session with auth
        self.session = requests.Session()
        self.session.auth = (PLANET_API_KEY, '')
    
    def wait_if_needed(self):
        """Wait if necessary to respect rate limit"""
        with self.lock:
            now = time.time()
            elapsed = now - self.last_request_time
            if elapsed < self.min_interval:
                sleep_time = self.min_interval - elapsed
                time.sleep(sleep_time)
            self.last_request_time = time.time()

@backoff.on_exception(
    backoff.expo,
    requests.exceptions.RequestException,
    max_tries=3,
    max_time=30
)
def make_request_with_retry(request_func, *args, **kwargs):
    """Make HTTP request with rate limiting and retry logic"""
    rate_limiter.wait_if_needed()
    return request_func(*args, **kwargs)

# =============================================================================
# EXISTING DOWNLOADS CHECK
# =============================================================================

def get_downloaded_files_list(download_folders):
    """Get list of all already downloaded item IDs with asset-specific info"""
    downloaded_items = {}  # item_id -> {asset_key: True/False}
    
    print("Scanning for existing downloads...")
    for folder in download_folders:
        if not os.path.exists(folder):
            continue
            
        print(f"Scanning: {folder}")
        
        # Get all subdirectories (like Freezeup_2024, Breakup_2020, etc.)
        subfolders = []
        try:
            for item in os.listdir(folder):
                item_path = os.path.join(folder, item)
                if os.path.isdir(item_path):
                    subfolders.append(item_path)
                    print(f"  Found subfolder: {item}")
        except Exception as e:
            print(f"  Error reading folder {folder}: {e}")
            continue
        
        # Also include the root folder
        folders_to_scan = [folder] + subfolders
        
        for scan_folder in folders_to_scan:
            print(f"  Scanning: {scan_folder}")
            files_found = 0
            
            # Check for both 4b and 3b versions of each asset
            for asset_key in ASSET_KEYS:
                # Check 4b version
                pattern_4b = os.path.join(scan_folder, f"*{asset_key}*{EXT_MAP[asset_key]}")
                files_4b = glob.glob(pattern_4b)
                
                # Check 3b version
                asset_key_3b = asset_key.replace('4b', '3b')
                pattern_3b = os.path.join(scan_folder, f"*{asset_key_3b}*{EXT_MAP[asset_key]}")
                files_3b = glob.glob(pattern_3b)
                
                all_files = files_4b + files_3b
                files_found += len(all_files)
                
                for file_path in all_files:
                    filename = os.path.basename(file_path)
                    # Extract item_id from Planet filename format
                    filename_without_ext = filename.replace(EXT_MAP[asset_key], '')
                    
                    # Remove the asset key suffix (try both 4b and 3b)
                    item_id = None
                    for suffix in [f'_{asset_key}', asset_key, f'_{asset_key_3b}', asset_key_3b]:
                        if filename_without_ext.endswith(suffix):
                            item_id = filename_without_ext[:-len(suffix)]
                            break
                    
                    if not item_id:
                        # Fallback: try to extract item_id by removing known suffixes
                        parts = filename_without_ext.split('_')
                        if len(parts) >= 4:  # Planet IDs typically have at least 4 parts
                            item_id = '_'.join(parts[:4])
                        else:
                            continue
                    
                    if item_id not in downloaded_items:
                        downloaded_items[item_id] = {key: False for key in ASSET_KEYS}
                    downloaded_items[item_id][asset_key] = True
            
            if files_found > 0:
                print(f"    Found {files_found} files")
    
    print(f"Found {len(downloaded_items)} unique items with downloads")
    
    # Print some examples for verification
    if downloaded_items:
        print(f"\nExample downloaded items:")
        for i, (item_id, assets) in enumerate(list(downloaded_items.items())[:5]):
            asset_status = [key for key, status in assets.items() if status]
            print(f"  {item_id}: {asset_status}")
        if len(downloaded_items) > 5:
            print(f"  ... and {len(downloaded_items) - 5} more")
    
    return downloaded_items

# =============================================================================
# PLANET API SEARCH FUNCTIONS
# =============================================================================

def search_all_images(aoi, start_date, end_date, cloud_threshold=0.0):
    """Search for ALL Planet images in the given AOI and date range - handles full pagination"""
    
    search_request = {
        "item_types": ["PSScene"],
        "filter": {
            "type": "AndFilter",
            "config": [
                {
                    "type": "GeometryFilter",
                    "field_name": "geometry",
                    "config": aoi
                },
                {
                    "type": "DateRangeFilter", 
                    "field_name": "acquired",
                    "config": {
                        "gte": start_date,
                        "lte": end_date
                    }
                },
                {
                    "type": "RangeFilter",
                    "field_name": "cloud_cover",
                    "config": {
                        "lte": cloud_threshold
                    }
                }
            ]
        }
    }
    
    all_features = []
    search_url = f"{BASE_URL}/quick-search"
    page_count = 0
    
    try:
        print(f"Starting search from {start_date} to {end_date}")
        
        # First request
        response = make_request_with_retry(rate_limiter.session.post, search_url, json=search_request)
        response.raise_for_status()
        data = response.json()
        
        page_features = data.get('features', [])
        all_features.extend(page_features)
        page_count += 1
        
        print(f"Page {page_count}: Retrieved {len(page_features)} images (Total: {len(all_features)})")
        
        # Debug: Show what pagination links are available
        available_links = list(data.get('_links', {}).keys())
        print(f"Available _links: {available_links}")
        
        # Handle pagination - keep going until no more pages
        while data.get('_links', {}).get('_next'):
            next_url = data['_links']['_next']
            
            try:
                response = make_request_with_retry(rate_limiter.session.get, next_url)
                response.raise_for_status()
                data = response.json()
                
                page_features = data.get('features', [])
                all_features.extend(page_features)
                page_count += 1
                
                print(f"Page {page_count}: Retrieved {len(page_features)} images (Total: {len(all_features)})")
                
                # Safety check - if we're not getting new features, break
                if len(page_features) == 0:
                    print("No more features returned, stopping pagination")
                    break
                
                # Additional safety check - prevent infinite loops
                if page_count > 1000:  # Reasonable upper limit
                    print("WARNING: Reached 1000 pages, stopping to prevent infinite loop")
                    break
                    
            except Exception as e:
                print(f"Error retrieving page {page_count + 1}: {str(e)}")
                print(f"Continuing with {len(all_features)} images retrieved so far")
                break
        
        print(f"Search complete: Retrieved {len(all_features)} total images across {page_count} pages")
        
        # Additional validation
        if page_count == 1 and len(all_features) == 250:
            print("WARNING: Got exactly 250 results on 1 page - this might indicate pagination issues!")
            print("If you expected more results, check the API response format.")
        
    except Exception as e:
        print(f"Error in search: {str(e)}")
        return []
    
    return all_features

# =============================================================================
# ASSET AVAILABILITY CHECK WITH 3b/4b FALLBACK
# =============================================================================

def check_asset_availability(assets_url, asset_key):
    """
    Check asset availability with fallback from 4b to 3b
    Returns: (status, download_url, actual_asset_key_used)
    """
    try:
        resp = make_request_with_retry(rate_limiter.session.get, assets_url)
        resp.raise_for_status()
        assets_data = resp.json()
        
        # Try 4b version first
        if asset_key in assets_data:
            asset = assets_data[asset_key]
            status = asset.get('status', 'inactive')
            
            if status == 'active' and 'location' in asset:
                return "Downloaded 4B", asset['location'], asset_key
            else:
                return "Needs Activation 4B", "", asset_key
        
        # If 4b doesn't exist, try 3b version
        fallback_key = ASSET_FALLBACK_MAP.get(asset_key, asset_key.replace('4b', '3b'))
        if fallback_key in assets_data:
            asset = assets_data[fallback_key]
            status = asset.get('status', 'inactive')
            
            if status == 'active' and 'location' in asset:
                return "Downloaded 3B", asset['location'], fallback_key
            else:
                return "Needs Activation 3B", "", fallback_key
        
        # Neither 4b nor 3b exists
        return "Does Not Exist", "", asset_key
        
    except Exception as e:
        print(f"Error checking asset availability: {str(e)}")
        return "Needs Activation", "", asset_key

# =============================================================================
# CSV GENERATION WITH INDIVIDUAL ASSET STATUS
# =============================================================================

def generate_download_csv(aoi, years, seasons, download_folders, cloud_threshold=0.0):
    """
    Generate CSV with individual asset status tracking
    Each asset gets its own status column (no individual URL columns)
    """
    print(f"\n{'=' * 80}")
    print(f"GENERATING INDIVIDUAL ASSET STATUS TRACKING CSV")
    print(f"{'=' * 80}")
    
    # Get list of already downloaded files
    downloaded_items = get_downloaded_files_list(download_folders)
    
    # Generate CSV filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_path = os.path.join(download_folders[0], f'planet_download_queue_{timestamp}.csv')
    
    # Create headers with individual asset status columns only
    headers = [
        'year', 'season', 'item_id', 'item_type', 'acquired', 'cloud_cover', 'assets_url'
    ]
    
    # Add status columns for each asset (no URL columns)
    for asset_key in ASSET_KEYS:
        headers.append(f'{asset_key}_status')
    
    # Write headers to CSV
    with open(csv_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(headers)
    
    total_records = 0
    
    # Process each year
    for year in years:
        print(f"\n{'=' * 60}")
        print(f"PROCESSING YEAR {year}")
        print(f"{'=' * 60}")
        
        year_records = []
        
        # Process each season in the year
        for season, (start_template, end_template) in seasons.items():
            start_date = start_template.format(year=year)
            end_date = end_template.format(year=year)
            
            print(f"\nProcessing {season} {year}: {start_date} to {end_date}")
            
            try:
                # Search for all images
                features = search_all_images(aoi, start_date, end_date, cloud_threshold)
                print(f"Found {len(features)} images for {season} {year}")
                
                if not features:
                    print(f"No images found for {season} {year}, skipping")
                    continue
                
                # Process each image
                for feat in features:
                    item_id = feat['id']
                    item_type = feat['properties']['item_type']
                    acquired = feat['properties']['acquired']
                    cloud_cover = feat['properties']['cloud_cover']
                    
                    # Get assets URL
                    assets_url = f"{BASE_URL}/item-types/{item_type}/items/{item_id}/assets/"
                    
                    # Start building record
                    record = [year, season, item_id, item_type, acquired, cloud_cover, assets_url]
                    
                    # Check if already downloaded
                    existing_assets = downloaded_items.get(item_id, {})
                    
                    # Process each asset individually
                    for asset_key in ASSET_KEYS:
                        if existing_assets.get(asset_key, False):
                            # Already downloaded - but check if it's 3b or 4b from filename patterns
                            asset_status = "Downloaded"  # We'll enhance this later if needed
                        else:
                            # Check asset availability with 3b/4b fallback
                            asset_status, _, _ = check_asset_availability(assets_url, asset_key)
                        
                        # Add asset status to record
                        record.append(asset_status)
                    
                    year_records.append(record)
                
            except Exception as e:
                print(f"CRITICAL ERROR: Failed to process {season} {year}: {str(e)}")
                continue
        
        # Sort year records by season and acquired date
        year_records.sort(key=lambda x: (x[1], x[4]))  # Sort by season, then acquired
        
        # Append year records to CSV and clear from memory
        if year_records:
            with open(csv_path, 'a', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(year_records)
            
            print(f"Year {year}: Added {len(year_records)} records to CSV")
            total_records += len(year_records)
            
            # Clear from memory
            year_records.clear()
            print(f"Year {year}: Memory cleared")
    
    print(f"\n{'=' * 80}")
    print(f"CSV GENERATION COMPLETE")
    print(f"{'=' * 80}")
    print(f"Total records: {total_records}")
    print(f"CSV saved to: {csv_path}")
    
    # Print summary
    print_csv_summary(csv_path)
    
    return csv_path

def print_csv_summary(csv_path):
    """Print summary of CSV contents with individual asset breakdown"""
    print(f"\n{'=' * 60}")
    print(f"DOWNLOAD SUMMARY")
    print(f"{'=' * 60}")
    
    df = pd.read_csv(csv_path)
    
    total_images = len(df)
    print(f"Total Images: {total_images}")
    
    # Asset-specific status breakdown
    print(f"\nAsset Status Breakdown:")
    for asset_key in ASSET_KEYS:
        status_col = f'{asset_key}_status'
        if status_col in df.columns:
            print(f"\n{asset_key}:")
            status_counts = df[status_col].value_counts()
            for status, count in status_counts.items():
                percentage = (count / total_images) * 100
                print(f"  {status}: {count} ({percentage:.1f}%)")
    
    # Summary by year and season
    print(f"\n{'=' * 40}")
    print(f"SUMMARY BY YEAR AND SEASON")
    print(f"{'=' * 40}")
    
    summary = df.groupby(['year', 'season']).agg({
        'item_id': 'count'
    }).rename(columns={'item_id': 'total_images'})
    
    # Add downloaded counts for each asset
    for asset_key in ASSET_KEYS:
        status_col = f'{asset_key}_status'
        if status_col in df.columns:
            summary[f'{asset_key}_downloaded'] = df.groupby(['year', 'season'])[status_col].apply(
                lambda x: (x == 'Downloaded').sum()
            )
    
    print(summary)

# =============================================================================
# QUEUE MANAGER - SEASON-BY-SEASON PROCESSING WITH INDIVIDUAL ASSETS
# =============================================================================

class SeasonQueueManager:
    """Manages download queue processing season by season with individual asset tracking"""
    
    def __init__(self, csv_path, download_folders, target_status="Needs Activation", print_debug=False):
        self.csv_path = csv_path
        self.download_folders = download_folders
        self.output_folder = download_folders[0]
        self.target_status = target_status
        self.print_debug = print_debug
        
        # Load CSV
        self.df = pd.read_csv(csv_path)
        
        # Processing control
        self.activation_timeout = 90 * 60  # 90 minutes
        
    def get_season_queue(self, year, season):
        """Get items for a specific season that need processing (any asset with target status)"""
        mask = (self.df['year'] == year) & (self.df['season'] == season)
        season_items = self.df[mask].copy()
        
        # Filter items that have at least one asset with target status
        items_needing_processing = []
        for idx, row in season_items.iterrows():
            has_target_status = False
            for asset_key in ASSET_KEYS:
                status_col = f'{asset_key}_status'
                if status_col in row:
                    status = row[status_col]
                    # Check for any "Needs Activation" status (3B or 4B)
                    if (self.target_status == "Needs Activation" and 
                        ("Needs Activation" in str(status))):
                        has_target_status = True
                        break
                    elif status == self.target_status:
                        has_target_status = True
                        break
            
            if has_target_status:
                items_needing_processing.append(idx)
        
        season_items = season_items.loc[items_needing_processing]
        
        print(f"Found {len(season_items)} items for {season} {year} with at least one asset in '{self.target_status}' status")
        
        if self.print_debug and len(season_items) > 0:
            print("\nItems in queue:")
            for idx, row in season_items.iterrows():
                asset_statuses = []
                for asset_key in ASSET_KEYS:
                    status_col = f'{asset_key}_status'
                    if status_col in row:
                        asset_statuses.append(f"{asset_key}:{row[status_col]}")
                print(f"  {row['item_id']} - {' | '.join(asset_statuses)}")
        
        return season_items
    
    def process_all_seasons(self):
        """Process all seasons one by one"""
        print(f"\n{'=' * 80}")
        print(f"STARTING SEASON-BY-SEASON QUEUE PROCESSING")
        print(f"{'=' * 80}")
        print(f"Target status: {self.target_status}")
        
        # Get unique year-season combinations that have items needing processing
        seasons_to_process = []
        unique_seasons = self.df[['year', 'season']].drop_duplicates()
        
        for _, row in unique_seasons.iterrows():
            year, season = row['year'], row['season']
            season_items = self.get_season_queue(year, season)
            if len(season_items) > 0:
                seasons_to_process.append((year, season))
        
        seasons_to_process.sort()
        print(f"Found {len(seasons_to_process)} seasons to process")
        
        for idx, (year, season) in enumerate(seasons_to_process):
            print(f"\n{'=' * 60}")
            print(f"PROCESSING SEASON {idx + 1}/{len(seasons_to_process)}: {season} {year}")
            print(f"{'=' * 60}")
            
            season_items = self.get_season_queue(year, season)
            
            # Process this season
            processor = SeasonProcessor(
                season_items=season_items,
                csv_path=self.csv_path,
                output_folder=self.output_folder,
                season_name=f"{season}_{year}"
            )
            
            processor.process_season()
        
        print(f"\n{'=' * 80}")
        print(f"ALL SEASONS PROCESSING COMPLETE!")
        print(f"{'=' * 80}")
        
        # Print final summary
        print_csv_summary(self.csv_path)

# =============================================================================
# SEASON PROCESSOR - HANDLES ONE SEASON WITH INDIVIDUAL ASSET TRACKING
# =============================================================================

class SeasonProcessor:
    """Processes downloads for a single season with individual asset tracking and 3b/4b fallback"""
    
    def __init__(self, season_items, csv_path, output_folder, season_name):
        self.season_items = season_items
        self.csv_path = csv_path
        self.output_folder = output_folder
        self.season_name = season_name
        
        # Processing queues
        self.activation_queue = queue.Queue()
        self.download_queue = queue.Queue()
        
        # Tracking
        self.activation_start_time = None
        self.activation_attempts = {}
        self.shutdown_event = threading.Event()
        
        # Populate activation queue with individual assets
        for idx, row in season_items.iterrows():
            for asset_key in ASSET_KEYS:
                status_col = f'{asset_key}_status'
                if status_col in row:
                    status = row[status_col]
                    # Check for any "Needs Activation" status (3B or 4B)
                    if "Needs Activation" in str(status):
                        self.activation_queue.put({
                            'df_index': idx,
                            'item_id': row['item_id'],
                            'asset_key': asset_key,
                            'assets_url': row['assets_url'],
                            'year': row['year'],
                            'season': row['season']
                        })
    
    def process_season(self):
        """Main processing function for one season"""
        print(f"Starting processing for {self.season_name}")
        print(f"Individual assets to process: {self.activation_queue.qsize()}")
        
        self.activation_start_time = time.time()
        
        # Start worker threads
        with ThreadPoolExecutor(max_workers=3) as executor:
            # Start workers
            activation_future = executor.submit(self._activation_worker)
            download_future = executor.submit(self._download_worker)
            status_future = executor.submit(self._status_monitor)
            
            # Wait for 90 minutes or until activation queue is empty
            timeout = 90 * 60  # 90 minutes
            start_time = time.time()
            
            while (time.time() - start_time < timeout and 
                   not self.activation_queue.empty()):
                time.sleep(60)  # Check every minute
                
                elapsed = (time.time() - start_time) / 60
                print(f"Season progress - {elapsed:.1f} minutes elapsed")
                print(f"  Activation queue: {self.activation_queue.qsize()}")
                print(f"  Download queue: {self.download_queue.qsize()}")
            
            # After 90 minutes, mark remaining items as failed
            if time.time() - start_time >= timeout:
                print(f"90 minute timeout reached for {self.season_name}")
                self._mark_remaining_as_failed()
            
            # Don't shut down until downloads are complete
            print("Waiting for downloads to complete...")
            while not self.download_queue.empty():
                time.sleep(30)
                print(f"  Download queue: {self.download_queue.qsize()}")
            
            # Shutdown workers
            self.shutdown_event.set()
            
            # Wait for workers to complete
            try:
                activation_future.result(timeout=30)
                download_future.result(timeout=30)
                status_future.result(timeout=30)
            except Exception as e:
                print(f"Error shutting down workers: {e}")
        
        print(f"Season {self.season_name} processing complete!")
    
    def _activation_worker(self):
        """Worker for activating and checking individual asset status with 3b/4b fallback"""
        print(f"Activation worker started for {self.season_name}")
        
        while not self.shutdown_event.is_set():
            try:
                if self.activation_queue.empty():
                    time.sleep(10)
                    continue
                
                # Process items in batches
                batch = []
                while not self.activation_queue.empty() and len(batch) < 50:
                    try:
                        item = self.activation_queue.get_nowait()
                        batch.append(item)
                    except queue.Empty:
                        break
                
                if not batch:
                    continue
                
                # Process batch
                for item in batch:
                    try:
                        self._process_activation_item(item)
                    except Exception as e:
                        print(f"Error processing {item['item_id']}:{item['asset_key']}: {e}")
                        # Put back in queue for retry
                        self.activation_queue.put(item)
                
                time.sleep(30)  # Wait before next batch
                
            except Exception as e:
                print(f"Activation worker error: {e}")
                time.sleep(60)
        
        print(f"Activation worker stopped for {self.season_name}")
    
    def _process_activation_item(self, item):
        """Process a single asset for activation with 3b/4b fallback"""
        item_id = item['item_id']
        asset_key = item['asset_key']
        assets_url = item['assets_url']
        
        try:
            # Get current asset status with 3b/4b fallback
            resp = make_request_with_retry(rate_limiter.session.get, assets_url)
            resp.raise_for_status()
            assets_data = resp.json()
            
            # Try 4b version first
            actual_asset_key = asset_key
            asset = None
            
            if asset_key in assets_data:
                asset = assets_data[asset_key]
                version_suffix = "4B"
            else:
                # Try 3b version using the mapping
                fallback_key = ASSET_FALLBACK_MAP.get(asset_key, asset_key.replace('4b', '3b'))
                if fallback_key in assets_data:
                    asset = assets_data[fallback_key]
                    actual_asset_key = fallback_key
                    version_suffix = "3B"
            
            if asset is None:
                # Asset doesn't exist (neither 4b nor 3b)
                self._update_asset_status(item['df_index'], asset_key, "Does Not Exist")
                print(f"Does not exist: {item_id}:{asset_key} (tried both 4b and 3b)")
                return
            
            status = asset.get('status', 'inactive')
            
            if status == 'active' and 'location' in asset:
                # Ready for download
                download_url = asset['location']
                self._update_asset_status(item['df_index'], asset_key, f"Downloaded {version_suffix}")
                
                # Add to download queue
                self.download_queue.put({
                    'df_index': item['df_index'],
                    'item_id': item_id,
                    'asset_key': asset_key,
                    'actual_asset_key': actual_asset_key,  # Track which version we're using
                    'download_url': download_url,
                    'year': item['year'],
                    'season': item['season']
                })
                print(f"Ready for download: {item_id}:{actual_asset_key}")
            elif status in ['inactive', 'activating']:
                # Try to activate if possible
                if 'activate' in asset.get('_links', {}):
                    activate_url = asset['_links']['activate']
                    try:
                        resp = make_request_with_retry(rate_limiter.session.post, activate_url)
                        resp.raise_for_status()
                        print(f"Activation requested for {item_id}:{actual_asset_key}")
                    except Exception as e:
                        print(f"Failed to activate {item_id}:{actual_asset_key}: {e}")
                
                # Check if we should retry or fail
                key = f"{item_id}:{asset_key}"
                self.activation_attempts[key] = self.activation_attempts.get(key, 0) + 1
                
                elapsed = time.time() - self.activation_start_time
                if elapsed > 90 * 60 or self.activation_attempts[key] > 3:
                    # Mark as failed activation
                    self._update_asset_status(item['df_index'], asset_key, f"Failed Activation {version_suffix}")
                    print(f"Failed activation: {item_id}:{actual_asset_key} (attempts: {self.activation_attempts[key]})")
                else:
                    # Put back in queue for retry
                    self.activation_queue.put(item)
            else:
                # Unknown status, mark as failed
                self._update_asset_status(item['df_index'], asset_key, f"Failed Activation {version_suffix}")
                print(f"Unknown status '{status}' for {item_id}:{actual_asset_key}")
                
        except Exception as e:
            print(f"Error processing activation for {item_id}:{asset_key}: {e}")
            # Put back in queue for retry
            key = f"{item_id}:{asset_key}"
            self.activation_attempts[key] = self.activation_attempts.get(key, 0) + 1
            if self.activation_attempts[key] <= 3:
                self.activation_queue.put(item)
            else:
                self._update_asset_status(item['df_index'], asset_key, "Failed Activation")
    
    def _download_worker(self):
        """Worker for downloading individual assets and batching them"""
        print(f"Download worker started for {self.season_name}")
        
        # Collect items for batching
        download_batch = {}  # item_id -> {assets: {asset_key: (url, actual_asset_key)}, df_index: idx, ...}
        
        while not self.shutdown_event.is_set() or not self.download_queue.empty():
            try:
                # Collect items for this batch
                batch_timeout = time.time() + 300  # 5 minute batching window
                
                while time.time() < batch_timeout and not self.download_queue.empty():
                    try:
                        item = self.download_queue.get_nowait()
                        item_id = item['item_id']
                        
                        if item_id not in download_batch:
                            download_batch[item_id] = {
                                'assets': {},
                                'df_index': item['df_index'],
                                'year': item['year'],
                                'season': item['season']
                            }

                        # New check before adding to batch
                        df = pd.read_csv(self.csv_path)
                        status_cols = [f'{k}_status' for k in ASSET_KEYS]
                        row = df.loc[df['item_id'] == item_id]

                        if not row.empty:
                            if (row[status_cols] == 'Does Not Exist').any(axis=1).iloc[0]:
                                print(f"Skipping {item_id} - one or more assets marked 'Does Not Exist'")
                                continue
                        
                        download_batch[item_id]['assets'][item['asset_key']] = (
                            item['download_url'], 
                            item['actual_asset_key']
                        )
                        
                    except queue.Empty:
                        time.sleep(1)
                        continue
                
                # Process the batch
                if download_batch:
                    self._process_download_batch(download_batch)
                    download_batch.clear()
                
                if self.download_queue.empty():
                    time.sleep(10)
                
            except Exception as e:
                print(f"Download worker error: {e}")
                time.sleep(30)
        
        # Process any remaining items
        if download_batch:
            self._process_download_batch(download_batch)
        
        print(f"Download worker stopped for {self.season_name}")
    
    def _process_download_batch(self, download_batch):
        """Process a batch of downloads, creating zip files for complete items"""
        for item_id, item_data in download_batch.items():
            assets = item_data['assets']
            
            try:
                # Create zip file for this item
                zip_filename = f"{self.season_name}_{item_id}.zip"
                zip_path = os.path.join(self.output_folder, zip_filename)
                
                with zipfile.ZipFile(zip_path, 'w') as batch_zip:
                    downloaded_assets = []
                    
                    for asset_key, (download_url, actual_asset_key) in assets.items():
                        try:
                            # Download the asset
                            resp = make_request_with_retry(rate_limiter.session.get, download_url)
                            resp.raise_for_status()
                            
                            # Use the actual asset key (3b or 4b) in filename
                            filename = f"{item_id}_{actual_asset_key}{EXT_MAP[asset_key]}"
                            batch_zip.writestr(filename, resp.content)
                            downloaded_assets.append(asset_key)
                            
                            print(f"Downloaded to zip: {filename}")
                            
                        except Exception as e:
                            print(f"Error downloading {item_id}:{actual_asset_key}: {e}")
                            # Mark this specific asset as failed
                            self._update_asset_status(item_data['df_index'], asset_key, "Failed Activation")
                
                # Extract zip file if any assets were downloaded
                if downloaded_assets:
                    # Create season folder
                    season_folder = os.path.join(self.output_folder, f"{item_data['season']}_{item_data['year']}")
                    os.makedirs(season_folder, exist_ok=True)
                    
                    # Extract files
                    with zipfile.ZipFile(zip_path, 'r') as batch_zip:
                        batch_zip.extractall(season_folder)
                    
                    # Update status for successfully downloaded assets
                    for asset_key in downloaded_assets:
                        if asset_key in assets:
                            # Determine if it was 3B or 4B based on the actual asset key used
                            _, actual_asset_key = assets[asset_key]
                            if '3b' in actual_asset_key or actual_asset_key in ASSET_FALLBACK_MAP.values():
                                version_suffix = "3B"
                            else:
                                version_suffix = "4B"
                            self._update_asset_status(item_data['df_index'], asset_key, f"Downloaded {version_suffix}")
                    
                    print(f"Extracted {len(downloaded_assets)} assets for {item_id} to {season_folder}")
                    
                    # Remove zip file after extraction
                    try:
                        os.remove(zip_path)
                    except:
                        pass
                        
            except Exception as e:
                print(f"Error processing batch for {item_id}: {e}")
                # Mark all assets in this item as failed
                for asset_key in assets.keys():
                    self._update_asset_status(item_data['df_index'], asset_key, "Failed Activation")
    
    def _status_monitor(self):
        """Monitor and report progress every 5 minutes"""
        while not self.shutdown_event.is_set():
            time.sleep(5 * 60)  # 5 minutes
            
            elapsed = (time.time() - self.activation_start_time) / 60
            print(f"\n--- Status Update ({elapsed:.1f} min) ---")
            print(f"Activation queue: {self.activation_queue.qsize()}")
            print(f"Download queue: {self.download_queue.qsize()}")
    
    def _update_asset_status(self, df_index, asset_key, new_status):
        """Update CSV with new status for individual asset"""
        try:
            df = pd.read_csv(self.csv_path)
            if df_index < len(df):
                status_col = f'{asset_key}_status'
                
                if status_col in df.columns:
                    df.loc[df_index, status_col] = new_status
                    
                df.to_csv(self.csv_path, index=False)
        except Exception as e:
            print(f"Error updating CSV: {e}")
    
    def _mark_remaining_as_failed(self):
        """Mark all remaining assets in activation queue as failed"""
        failed_count = 0
        while not self.activation_queue.empty():
            try:
                item = self.activation_queue.get_nowait()
                self._update_asset_status(item['df_index'], item['asset_key'], "Failed Activation")
                failed_count += 1
            except queue.Empty:
                break
        
        print(f"Marked {failed_count} assets as Failed Activation due to timeout")

# =============================================================================
# MAIN EXECUTION FUNCTIONS
# =============================================================================

def step1_check_existing_downloads():
    """
    STEP 1: Get list of all files already downloaded with asset-specific breakdown
    """
    print(f"\n{'=' * 80}")
    print(f"STEP 1: CHECKING EXISTING DOWNLOADS")
    print(f"{'=' * 80}")
    
    downloaded_items = get_downloaded_files_list(DOWNLOAD_FOLDERS)
    
    print(f"\nSummary:")
    print(f"Found {len(downloaded_items)} unique items with downloads")
    
    # Asset breakdown
    if downloaded_items:
        asset_totals = {key: 0 for key in ASSET_KEYS}
        for item_id, assets in downloaded_items.items():
            for asset_key in ASSET_KEYS:
                if assets.get(asset_key, False):
                    asset_totals[asset_key] += 1
        
        print(f"\nAsset breakdown:")
        for asset_key in ASSET_KEYS:
            print(f"  {asset_key}: {asset_totals[asset_key]} files")
        
        print(f"\nFirst 10 downloaded items:")
        for i, (item_id, assets) in enumerate(list(downloaded_items.items())[:10]):
            asset_status = [key for key, status in assets.items() if status]
            print(f"  {i+1}. {item_id}: {asset_status}")
        
        if len(downloaded_items) > 10:
            print(f"  ... and {len(downloaded_items) - 10} more")
    
    return downloaded_items

def step2_generate_csv():
    """
    STEP 2: Generate CSV with individual asset status columns (no URL columns)
    """
    print(f"\n{'=' * 80}")
    print(f"STEP 2: GENERATING INDIVIDUAL ASSET TRACKING CSV")
    print(f"{'=' * 80}")
    
    csv_path = generate_download_csv(
        aoi=AOI_POLYGON,
        years=YEARS,
        seasons=SEASONS,
        download_folders=DOWNLOAD_FOLDERS,
        cloud_threshold=CLOUD_THRESHOLD
    )
    
    if csv_path:
        print(f"\n{'=' * 80}")
        print(f"CSV GENERATION COMPLETE!")
        print(f"{'=' * 80}")
        print(f"CSV file saved to: {csv_path}")
        print(f"You can now proceed to Step 3 for queue management!")
        
        return csv_path
    else:
        print("ERROR: CSV generation failed!")
        return None

def step3_run_queue_manager(csv_path=None, target_status="Needs Activation", print_debug=False):
    """
    STEP 3: Run the queue manager to process downloads season by season
    Now processes individual assets with their own status tracking and 3b/4b fallback
    
    Args:
        csv_path: Path to CSV file (if None, will try to find most recent)
        target_status: Which status to process ("Needs Activation", "Failed Activation", etc.)
        print_debug: Whether to print detailed debug info
    """
    print(f"\n{'=' * 80}")
    print(f"STEP 3: QUEUE MANAGER - INDIVIDUAL ASSET PROCESSING WITH 3b/4b FALLBACK")
    print(f"{'=' * 80}")
    
    # Find CSV if not provided
    if csv_path is None or not os.path.exists(csv_path):
        csv_files = glob.glob(os.path.join(PRIMARY_FOLDER, "planet_download_queue_*.csv"))
        if csv_files:
            csv_path = max(csv_files, key=os.path.getctime)  # Most recent file
            print(f"Using most recent CSV: {csv_path}")
        else:
            print("ERROR: No CSV file found!")
            print("Please run step2_generate_csv() first or provide a valid csv_path")
            return
    
    print(f"CSV file: {csv_path}")
    print(f"Target status: {target_status}")
    print(f"Print debug info: {print_debug}")
    print(f"3b/4b fallback: Enabled")
    
    # Create and run queue manager
    queue_manager = SeasonQueueManager(
        csv_path=csv_path,
        download_folders=DOWNLOAD_FOLDERS,
        target_status=target_status,
        print_debug=print_debug
    )
    
    queue_manager.process_all_seasons()

In [None]:
# =============================================================================
# CONFIGURATION SECTION
# =============================================================================

# =============================================================================
# API CONFIGURATION
# =============================================================================

# Load API key from config file
with open(PLANET_YAML_PATH, 'r') as f:
    PLANET_API_KEY = yaml.safe_load(f)['api_key']

BASE_URL = "https://api.planet.com/data/v1"

# Asset configuration - using 4b as primary with 3b fallback
ASSET_KEYS = [
    'ortho_analytic_4b_sr',
    'ortho_analytic_4b_xml', 
    'ortho_udm2'
]

# Asset mapping for 4b to 3b fallback
ASSET_FALLBACK_MAP = {
    'ortho_analytic_4b_sr': 'ortho_analytic_3b',
    'ortho_analytic_4b_xml': 'ortho_analytic_3b_xml',
    'ortho_udm2': 'ortho_udm2'  # This one stays the same
}

EXT_MAP = {
    'ortho_analytic_4b_sr': '.tif',
    'ortho_analytic_4b_xml': '.xml',
    'ortho_udm2': '.tif'
}

# =============================================================================
# PROCESSING CONFIGURATION
# =============================================================================

# Rate limiting configuration
MAX_REQUESTS_PER_SECOND = 4

# Initialize rate limiter
rate_limiter = RateLimiter(MAX_REQUESTS_PER_SECOND)


# =============================================================================
# FILE PATHS
# =============================================================================
""" MAKE SURE TO CHANGE NS HERE FOR STUDY SITE! """
# API configuration file
PLANET_YAML_PATH = r"D:\planetscope_lake_ice\planet.yaml"

# Download folders (primary and backup)
PRIMARY_FOLDER = r"E:\planetscope_lake_ice\Data\Input\NS 50x50 km"
BACKUP_FOLDER = r"C:\Users\nj142\Desktop\Fallback"

DOWNLOAD_FOLDERS = [PRIMARY_FOLDER, BACKUP_FOLDER]


# =============================================================================
# STUDY AREA AND TIME CONFIGURATION
# =============================================================================

# Define your area of interest (AOI)

"""
NORTH SLOPE BELOW: 
AOI_POLYGON = {
    "type": "Polygon", 
    "coordinates": [[
        [-155.902745, 70.351719], 
        [-155.917670, 70.799842], 
        [-154.555964, 70.799842], 
        [-154.570890, 70.351719], 
        [-155.902745, 70.351719]
    ]]
}

# Define seasons with date templates
SEASONS = {
    "Breakup": ("{year}-05-15T00:00:00Z", "{year}-07-30T23:59:59Z"),
    "Freezeup": ("{year}-10-01T00:00:00Z", "{year}-11-30T23:59:59Z")
}
"""

""" 
YKD BELOW:
"""
AOI_POLYGON = {
    "type": "Polygon",
    "coordinates": [[
        [-162.369230, 60.945820],
        [-162.375829, 61.394512],
        [-161.440110, 61.394512],
        [-161.446709, 60.945820],
        [-162.369230, 60.945820],
    ]]
}

SEASONS = {
    "Breakup": ("{year}-04-01T00:00:00Z", "{year}-06-30T23:59:59Z"),
    "Freezeup": ("{year}-10-01T00:00:00Z", "{year}-11-30T23:59:59Z")
}


# Define years to process
YEARS = [2019, 2020, 2021, 2022, 2023, 2024, 2025]

# Cloud cover threshold (0.0 = 0% cloud cover only)
CLOUD_THRESHOLD = 0.0

print("Configuration loaded successfully!")
print(f"API Key loaded from: {PLANET_YAML_PATH}")
print(f"Primary folder: {PRIMARY_FOLDER}")
print(f"Backup folder: {BACKUP_FOLDER}")
print(f"Years: {YEARS}")
print(f"Seasons: {list(SEASONS.keys())}")
print(f"Cloud threshold: {CLOUD_THRESHOLD}")

Configuration loaded successfully!
API Key loaded from: D:\planetscope_lake_ice\planet.yaml
Primary folder: E:\planetscope_lake_ice\Data\Input\NS 50x50 km
Backup folder: C:\Users\nj142\Desktop\Fallback
Years: [2019, 2020, 2021, 2022, 2023, 2024, 2025]
Seasons: ['Breakup', 'Freezeup']
Cloud threshold: 0.0
Asset keys (4b with 3b fallback): ['ortho_analytic_4b_sr', 'ortho_analytic_4b_xml', 'ortho_udm2']


In [10]:
# Step 1: Check what you already have downloaded
step1_check_existing_downloads()


STEP 1: CHECKING EXISTING DOWNLOADS
Scanning for existing downloads...
Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km
  Found subfolder: Breakup_2019
  Found subfolder: Breakup_2020
  Found subfolder: Breakup_2021
  Found subfolder: Breakup_2022
  Found subfolder: Breakup_2023
  Found subfolder: Breakup_2024
  Found subfolder: Breakup_2025
  Found subfolder: Freezeup_2019
  Found subfolder: Freezeup_2020
  Found subfolder: Freezeup_2021
  Found subfolder: Freezeup_2022
  Found subfolder: Freezeup_2023
  Found subfolder: Freezeup_2024
  Found subfolder: NS Bounding Box Shapefile
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2019
    Found 881 files
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2020
    Found 1000 files
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2021
    Found 800 files
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2

{'20190416_220045_00_1059': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190416_220047_03_1059': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201749_31_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201751_36_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201753_41_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201755_46_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201757_51_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190418_201759_57_106b': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml': True,
  'ortho_udm2': True},
 '20190419_184716_1048': {'ortho_analytic_4b_sr': True,
  'ortho_analytic_4b_xml

In [11]:
# Step 2: Generate the CSV with individual asset tracking to finish downloads
csv_path = step2_generate_csv()


STEP 2: GENERATING INDIVIDUAL ASSET TRACKING CSV

GENERATING INDIVIDUAL ASSET STATUS TRACKING CSV
Scanning for existing downloads...
Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km
  Found subfolder: Breakup_2019
  Found subfolder: Breakup_2020
  Found subfolder: Breakup_2021
  Found subfolder: Breakup_2022
  Found subfolder: Breakup_2023
  Found subfolder: Breakup_2024
  Found subfolder: Breakup_2025
  Found subfolder: Freezeup_2019
  Found subfolder: Freezeup_2020
  Found subfolder: Freezeup_2021
  Found subfolder: Freezeup_2022
  Found subfolder: Freezeup_2023
  Found subfolder: Freezeup_2024
  Found subfolder: NS Bounding Box Shapefile
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2019
    Found 881 files
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2020
    Found 1000 files
  Scanning: E:\planetscope_lake_ice\Data\Input\NS 50x50 km\Breakup_2021
    Found 800 files
  Scan

In [None]:
# 3. Download everything that needs activation
print("\n=== DOWNLOADING NEW ITEMS ===")
step3_run_queue_manager(csv_path)

# 4. Check results
print("\n=== FINAL STATUS ===")
get_asset_status_breakdown()

# 5. Retry any failures
print("\n=== RETRYING FAILURES ===")
retry_failed_activations()

# 6. Clean up
cleanup_empty_folders()

In [18]:
import requests
from requests.auth import HTTPBasicAuth

# URL to the specific item assets endpoint
ASSETS_URL = "https://api.planet.com/data/v1/item-types/PSScene/items/20190616_214746_1012/assets/"

def list_assets():
    response = requests.get(ASSETS_URL, auth=HTTPBasicAuth(PLANET_API_KEY, ''))
    
    if response.status_code == 200:
        assets = response.json()
        print("Available assets:")
        for asset in assets:
            print(f"- {asset}")
    else:
        print(f"Failed to retrieve assets. Status code: {response.status_code}")
        print("Response:", response.text)

if __name__ == '__main__':
    list_assets()


Available assets:
- basic_analytic_4b
- basic_analytic_4b_rpc
- basic_analytic_4b_xml
- basic_udm2
- ortho_analytic_3b
- ortho_analytic_3b_xml
- ortho_analytic_4b
- ortho_analytic_4b_sr
- ortho_analytic_4b_xml
- ortho_udm2
- ortho_visual
