In [None]:
#!/usr/bin/env python
# coding: utf-8

import cdsapi
import concurrent.futures
import os
import argparse
import logging
from functools import wraps
import time

# Configure logging
logging.basicConfig(
    filename='era5_download_2mTemp_6_34.log',
    level=logging.INFO,
    format='%(asctime)s %(levelname)s:%(message)s'
)

# Retry decorator with exponential backoff
def retry(max_retries=5, initial_delay=60, backoff_factor=2):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            delay = initial_delay
            for attempt in range(1, max_retries + 1):
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    logging.warning(f"Attempt {attempt} failed for {args}: {e}")
                    if attempt == max_retries:
                        logging.error(f"All {max_retries} attempts failed for {args}")
                        raise
                    logging.info(f"Retrying in {delay} seconds...")
                    time.sleep(delay)
                    delay *= backoff_factor
        return wrapper
    return decorator

# Function to verify the integrity of a NetCDF file using xarray
def verify_netcdf(file_path):
    import xarray as xr
    try:
        ds = xr.open_dataset(file_path)
        ds.close()
        logging.info(f"Verification successful for {file_path}.")
        return True
    except Exception as e:
        logging.error(f"Verification failed for {file_path}: {e}")
        return False

# Function to download data for a specific year with retries
@retry(max_retries=5, initial_delay=60, backoff_factor=2)
def download_year(year, client, output_dir, dataset, variables, product_type, months, days, hours):
    file_name = f'{dataset}_{"_".join(variables)}_{year}.nc'
    file_path = os.path.join(output_dir, file_name)

    if os.path.exists(file_path) and verify_netcdf(file_path):
        print(f"File for year {year} already exists and is valid. Skipping download.")
        return
    
    client.retrieve(
        dataset,
        {
            'product_type': product_type,
            'variable': variables,
            'year': str(year),
            'month': months,
            'day': days,
            'time': hours,
            'format': 'netcdf',
        },
        file_path
    )

    if not verify_netcdf(file_path):
        raise Exception("Incomplete download.")

def main():
    parser = argparse.ArgumentParser(description='Download ERA5 data with enhanced options.')
    parser.add_argument('--output_dir', type=str, required=True, help='Directory to save downloaded .nc files')
    parser.add_argument('--variables', type=str, nargs='+', default=['2m_temperature'], help='Variables to download')
    parser.add_argument('--start_year', type=int, default=1993, help='Start year for download (inclusive)')
    parser.add_argument('--end_year', type=int, default=2023, help='End year for download (inclusive)')
    parser.add_argument('--max_workers', type=int, default=4, help='Maximum concurrent downloads (default: 4)')
    parser.add_argument('--dataset', type=str, default='reanalysis-era5-single-levels', 
                        help='ERA5 dataset (default: reanalysis-era5-single-levels)')
    parser.add_argument('--product_type', type=str, default='reanalysis', 
                        help="Product type, e.g., 'reanalysis' or 'ensemble_members'")
    parser.add_argument('--months', type=str, nargs='+', default=[str(i).zfill(2) for i in range(1, 13)], 
                        help="Months to download (default: all months)")
    parser.add_argument('--days', type=str, nargs='+', default=[str(i).zfill(2) for i in range(1, 32)], 
                        help="Days to download (default: all days)")
    parser.add_argument('--hours', type=str, nargs='+', default=[f'{i:02d}:00' for i in range(24)], 
                        help="Hours to download (default: all hours)")

    args = parser.parse_args()

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir, exist_ok=True)

    client = cdsapi.Client()
    years = list(range(args.start_year, args.end_year + 1))
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
        futures = [
            executor.submit(
                download_year, year, client, args.output_dir, args.dataset, args.variables, 
                args.product_type, args.months, args.days, args.hours
            ) for year in years
        ]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                logging.error(f"Download failed: {e}")

if __name__ == "__main__":
    main()
