In [None]:
from maya4 import list_base_files_in_repo, list_repos_by_author
import pandas as pd

# ============================================================================
# üó∫Ô∏è FEATURE 1: DATASET EXPLORATION - Browse Available Products
# ============================================================================
print('='*80)
print('üó∫Ô∏è Exploring Available SAR Products on HuggingFace')
print('='*80 + '\n')

def extract_product_info(filename: str) -> tuple:
    """
    Extract polarization and product name from SAR filename.
    
    Args:
        filename: SAR product filename (e.g., "s1a-s1-raw-s-hh-20230508t121142-....zarr")
    
    Returns:
        tuple: (product_name_without_polarization, polarization)
        
    Example:
        Input:  "s1a-s6-raw-s-vh-20231004t082954-20231004t083025-050613-0618e5.zarr"
        Output: ("s1a-s6-raw-s-20231004t082954-20231004t083025-050613-0618e5", "VH")
    """
    if not filename.endswith('.zarr'):
        return None, None
    
    # Remove .zarr extension
    name_without_ext = filename.replace('.zarr', '')
    
    # Split by hyphen
    parts = name_without_ext.split('-')
    
    if len(parts) >= 5:
        # Extract polarization (5th token, index 4)
        polarization = parts[4].upper()  # VH, VV, HH, HV
        
        # Create product name by removing the polarization part
        product_parts = parts[:4] + parts[5:]
        product_name = '-'.join(product_parts)
        
        return product_name, polarization
    else:
        # Fallback if filename doesn't match expected pattern
        return name_without_ext, "UNKNOWN"


# List all repositories from Maya4 organization on HuggingFace
try:
    projects = list_repos_by_author(author="Maya4")
    print(f'‚úì Found {len(projects)} repositories in Maya4 organization:\n')
    
    # Create a comprehensive catalog of all available products
    product_catalog = pd.DataFrame({
        "Product Name": [],
        "File Name": [],
        "Polarization": [],
        "Repository": []
    })
    
    # Iterate through each repository (PT1, PT2, etc.)
    for i, project in enumerate(projects, 1):
        print(f'   {i}. {project}', end=' ')
        
        # List all SAR product files in this repository
        remote_files = list_base_files_in_repo(repo_id=f"{project}")
        print(f'({len(remote_files)} products)')
        
        # Extract metadata from each filename
        for file in remote_files:
            prod_name, polarization = extract_product_info(file)
            
            if prod_name is not None and polarization is not None:
                # Add to catalog
                product_catalog = pd.concat([
                    product_catalog,
                    pd.DataFrame({
                        "Product Name": [prod_name],
                        "File Name": [file],
                        "Polarization": [polarization],
                        "Repository": [project]
                    })
                ], ignore_index=True)
    
    print(f'\n‚úì Total products in catalog: {len(product_catalog)}')
    
    # Display summary statistics
    print(f'\nüìä Catalog Statistics:')
    print(f'   ‚Ä¢ Polarizations: {product_catalog["Polarization"].unique()}')
    print(f'   ‚Ä¢ Products per repository:')
    for repo in product_catalog["Repository"].unique():
        count = len(product_catalog[product_catalog["Repository"] == repo])
        print(f'     - {repo}: {count} products')
    
    # Show first few entries
    print(f'\nüìã Sample entries from catalog:')
    print(product_catalog.head(10).to_string(index=False))
    
except Exception as e:
    print(f'‚ö†Ô∏è Could not fetch repository list: {e}')
    print('   (This requires internet connection and HuggingFace access)')

# ============================================================================
# üé® FEATURE 2: SAMPLE VISUALIZATION
# ============================================================================
print('\n' + '='*80)
print('üé® Sample Visualization (if data available)')
print('='*80 + '\n')

# Create a simple loader for visualization
try:
    viz_loader = get_sar_dataloader(
        data_dir=DATA_DIR,
        level_from="rcmc",
        level_to="az",
        batch_size=1,
        num_workers=0,
        patch_mode="rectangular",
        patch_size=(5000, 5000),  # Large patch for visualization
        buffer=(0, 0),
        stride=(1000, 1000),
        shuffle_files=False,
        patch_order="row",
        complex_valued=True,
        save_samples=False,
        backend="zarr",
        verbose=False,
        samples_per_prod=1,
        cache_size=10,
        online=True,
        max_products=1,
        positional_encoding=False
    )
    
    # The dataset has a built-in visualization method
    # Usage: dataset.visualize_item([file_path, y_coord, x_coord], vminmax=(min, max))
    print('‚úì Visualization loader created')
    print('  Use: loader.dataset.visualize_item([file_path, y, x], vminmax=(min, max))')
    print('  Example: Shows input and target side-by-side with amplitude/phase')
    
except Exception as e:
    print(f'‚ö†Ô∏è Visualization setup failed: {e}')

# ============================================================================
# üîç FEATURE 3: DIFFERENT PATCH SAMPLING STRATEGIES
# ============================================================================
print('\n' + '='*80)
print('üîç Patch Sampling Strategy Comparison')
print('='*80 + '\n')

sampling_strategies = {
    "Row-wise (Horizontal Raster Scan)": {
        "patch_order": "row",
        "description": "Samples patches left‚Üíright, top‚Üíbottom like reading text",
        "use_case": "Sequential horizontal feature learning, coherent spatial ordering"
    },
    "Column-wise (Vertical Raster Scan)": {
        "patch_order": "col",
        "description": "Samples patches top‚Üíbottom, left‚Üíright",
        "use_case": "Range direction analysis, vertical feature extraction"
    },
    "Chunk-aligned (I/O Optimized)": {
        "patch_order": "chunk",
        "description": "Follows Zarr storage chunks for efficient disk/network access",
        "use_case": "Maximum I/O performance, minimizes cache misses"
    },
    "Block Pattern Sampling": {
        "patch_order": "row",
        "block_pattern": "(32, -1)",
        "description": "Divides image into blocks, samples within each block",
        "use_case": "Stratified sampling, ensuring coverage across entire scene"
    }
}

for strategy_name, config in sampling_strategies.items():
    print(f'üìç {strategy_name}')
    print(f'   Configuration: {config.get("patch_order", "row")}', end='')
    if "block_pattern" in config:
        print(f' with block_pattern={config["block_pattern"]}')
    else:
        print()
    print(f'   Description: {config["description"]}')
    print(f'   Use Case: {config["use_case"]}\n')

# ============================================================================
# üìä FEATURE 4: PROCESSING LEVEL COMBINATIONS
# ============================================================================
print('='*80)
print('üìä Available Processing Level Combinations')
print('='*80 + '\n')

processing_levels = {
    "raw ‚Üí rc": {
        "task": "Range Compression",
        "description": "Learn how radar echoes are compressed in range direction",
        "complexity": "Low - 1D processing"
    },
    "rc ‚Üí rcmc": {
        "task": "Range Cell Migration Correction",
        "description": "Learn to correct for target motion during acquisition",
        "complexity": "Medium - Geometric correction"
    },
    "rcmc ‚Üí az": {
        "task": "Azimuth Compression (Focusing)",
        "description": "Learn final focusing step to create SAR image",
        "complexity": "High - 2D focusing"
    },
    "raw ‚Üí rcmc": {
        "task": "Combined RC + RCMC",
        "description": "Multi-stage processing in one step",
        "complexity": "Medium-High - Combined operations"
    },
    "raw ‚Üí az": {
        "task": "End-to-End SAR Processing",
        "description": "Complete processing chain from echoes to image",
        "complexity": "Very High - Full focusing pipeline"
    }
}

for level_pair, info in processing_levels.items():
    print(f'üéØ {level_pair}')
    print(f'   Task: {info["task"]}')
    print(f'   Description: {info["description"]}')
    print(f'   Complexity: {info["complexity"]}\n')

# ============================================================================
# üéì SUMMARY
# ============================================================================
print('='*80)
print('üéì Key Takeaways')
print('='*80)

takeaways = [
    "Use filters to select specific years, polarizations, and modes",
    "Choose processing levels (level_from ‚Üí level_to) based on your research goal",
    "Optimize patch_order: 'chunk' for speed, 'row'/'col' for spatial coherence",
    "Enable positional_encoding for models that need spatial awareness",
    "Use concatenate_patches for sequence learning tasks",
    "Set online=True to automatically download from HuggingFace",
    "Start with max_products=1 for testing, then scale up",
    "Use transforms for normalization - essential for neural network training",
    "Complex_valued=True for native SAR representation, False for real/imag channels",
    "Monitor verbose=True output during development for debugging"
]

for i, takeaway in enumerate(takeaways, 1):
    print(f'  {i:2d}. {takeaway}')

print('\n' + '='*80)
print('‚úÖ Complete functionality demonstration finished!')
print('='*80)