In [19]:
import xarray as xr
import numpy as np
import os
import pandas as pd
import cdsapi
from datetime import datetime, timedelta

In [21]:
def download_era5_data(first_year, end_year, first_month, end_month, first_day, end_day, hourly_step, 
                       variables, individual_file_dir, combined_file_dir, pressure_levels = None):
    '''Downloads ERA5 data for specified variables and pressure levels (or single level) at the specified intervals,
       and saves them into a single NetCDF file with specified domain and structure.
    Parameters: 
    - first_year: the first year you want data from.
    - end_year: the last year you want data from.
    - first_month: the first month you want data from.
    - end_month: the last month you want data from.
    - first_day: the first day you want data from.
    - end_day: the last day you want data from.
    - hourly_step: specifies the hourly interval at which the user wants data (e.g., 3 for every 3 hours).
    - variables: a list of ERA5 variables you want to download, or a single variable.
    - pressure_levels: a list of pressure levels you want data taken from, or None if single level is desired.
    - individual_file_dir: the directory where the individual, pre-concatenated files will be stored.
    - combined_file_dir: The directory where the larger, concatenated file will be stored.
    '''

    # Call the cds api client
    c = cdsapi.Client()
    
    # Ensure variables is a list
    if not isinstance(variables, list):
        variables = [variables]
    
    # Define the date range you want to download
    start_date = datetime(first_year, first_month, first_day)
    end_date = datetime(end_year, end_month, end_day)
    
    # Generate list of dates, by day, using the start and end dates
    date_range = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]

    # Flexible list of string times, specified by hourly_step
    times = [f'{int(i):02d}:00' for i in range(0, 24, hourly_step)]

    # If pressure_levels is None, set it to an empty list
    if pressure_levels is None:
        pressure_levels = []

    # Initialize an empty list to hold all datasets
    all_data = []

    # Loop through each day in the range of dates defined
    for date in date_range:
        year = date.year
        month = date.month
        day = date.day

        # Loop through each time on each day to pull data from each hour wanted
        for time in times:
            for variable in variables:
                # Handle both cases where pressure levels are specified and not specified
                if pressure_levels:
                    for level in pressure_levels:
                        data_file = os.path.join(individual_file_dir, f'era5_{variable}_{year}_{month:02d}_{day:02d}_{time}_{level}.nc')
                        c.retrieve(
                            'reanalysis-era5-pressure-levels',
                            {
                                'product_type': 'reanalysis',
                                'variable': variable,
                                'pressure_level': f'{level}',
                                'year': f'{year}',
                                'month': f'{month:02d}',
                                'day': f'{day:02d}',
                                'time': time,
                                'area': [62, -110, 30, -60,],
                                'format': 'netcdf'
                            },
                            data_file
                        )
                else:
                    data_file = os.path.join(individual_file_dir, f'era5_{variable}_{year}_{month:02d}_{day:02d}_{time}.nc')
                    c.retrieve(
                        'reanalysis-era5-single-levels',
                        {
                            'product_type': 'reanalysis',
                            'variable': variable,
                            'year': f'{year}',
                            'month': f'{month:02d}',
                            'day': f'{day:02d}',
                            'time': time,
                            'area': [62, -110, 30, -60,],
                            'format': 'netcdf'
                        },
                        data_file
                    )
    
                    # Open the downloaded dataset
                    dataset = xr.open_dataset(data_file)
    
                    # Append the dataset to the list of datasets
                    all_data.append(dataset)

    # Concatenate all datasets along the 'time' dimension
    combined_dataset = xr.concat(all_data, dim = 'time')

    # Save the combined dataset to a new NetCDF file
    new_combined_dataset.to_netcdf(os.path.join(combined_file_dir, f'era5_combined_{start_date.strftime("%Y%m%d")}.nc'))
        
        # Combine new variables into a single dataset
        #new_combined_dataset = xr.Dataset(new_vars, attrs=combined_dataset.attrs)
    
        # Save the combined dataset to a new NetCDF file
        #new_combined_dataset.to_netcdf(os.path.join(combined_file_dir, f'era5_combined_{year}_{month:02d}_{day:02d}.nc'))
    
            # Combine all DataArrays for the current day into a single Dataset
            #combined_dataset = xr.concat(all_data_arrays, dim = 'time')
            #combined_dataset = xr.open_mfdataset(all_data_arrays, concat_dim = 'time')
    
            # Ensure correct ordering of dimensions for the final dataset
            #combined_dataset = combined_dataset.transpose('variable', 'pressure_level', 'time', 'latitude', 'longitude')
    
            # Save combined data to a NetCDF file for the current day
            #output_file = os.path.join(combined_file_dir, f'era5_combined_{year}_{month:02d}_{day:02d}.nc')
            #combined_dataset.to_netcdf(output_file)

In [23]:
variables = ['z', 'u', 'v']
pressure_levels = [850, 500, 300]
individual_file_dir = r"C:\Users\nweat\OneDrive\School\Blocking Research\ERA5IndividualFiles"
combined_file_dir = r"C:\Users\nweat\OneDrive\School\Blocking Research\ERA5CombinedFiles"
#lat_range = [62, 30]
#lon_range = [250, 300]
download_era5_data(2023, 2023, 8, 8, 24, 24, 12, variables, individual_file_dir, combined_file_dir, pressure_levels)

2024-07-02 14:53:01,018 INFO Welcome to the CDS
2024-07-02 14:53:01,020 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-07-02 14:53:01,260 INFO Request is queued


KeyboardInterrupt: 