## Using API to Parse the ERA5 Reanalysis Data from CDS

In [1]:
import cdsapi

# 创建客户端
client = cdsapi.Client()

# 数据集名称
dataset = "reanalysis-era5-land"

# 请求的变量
variables = [
    "total_precipitation",          # 降水
    "2m_temperature",               # 温度
    "10m_u_component_of_wind",      # 风场
    "surface_pressure",             # 气压
    "surface_net_solar_radiation"   # 辐射
]

# 北美范围
north_america_extent = {
    "area": [85, 190, 10, 310],  # 格式：[北纬, 西经, 南纬, 东经]
}

# 时间分辨率（6小时）
time_intervals = [
    "00:00", "06:00", "12:00", "18:00"
]

# 下载 2024 年 1 到 11 月的数据
for month in range(1, 12):
    print(f"Downloading data for month {month:02d}...")

    # 请求参数
    request = {
        "format": "netcdf",
        "product_type": "reanalysis",
        "variable": variables,
        "year": "2024",
        "month": f"{month:02d}",
        "day": [f"{day:02d}" for day in range(1, 32)],  # 天数
        "time": time_intervals,
        **north_america_extent,  # 添加地理范围
    }

    # 输出文件路径
    output_filename = f"era5_2024_month_{month:02d}_north_america.nc"

    # 执行抓取
    client.retrieve(dataset, request, output_filename)
    print(f"Data for month {month:02d} saved to {output_filename}.")

2024-11-24 22:07:54,248 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-11-24 22:07:54,248 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-11-24 22:07:54,249 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**


Downloading data for month 01...


2024-11-24 22:07:54,909 INFO Request ID is 167a5361-2e12-4ef8-9534-e4aed6ac501c
2024-11-24 22:07:55,121 INFO status has been updated to accepted
2024-11-24 22:07:56,361 INFO status has been updated to running
2024-11-24 22:10:59,965 INFO status has been updated to successful
                                                                                        

Data for month 01 saved to era5_2024_month_01_north_america.nc.
Downloading data for month 02...


2024-11-24 22:11:42,509 INFO Request ID is da32d82e-8373-49c5-9439-2925034eb103
2024-11-24 22:11:42,762 INFO status has been updated to accepted
2024-11-24 22:11:45,816 INFO status has been updated to running
2024-11-24 22:14:37,105 INFO status has been updated to successful
                                                                                        

Data for month 02 saved to era5_2024_month_02_north_america.nc.
Downloading data for month 03...


2024-11-24 22:14:56,646 INFO Request ID is 7b8d0a7d-e486-4e8d-b0e9-c933beff168c
2024-11-24 22:14:56,807 INFO status has been updated to accepted
2024-11-24 22:14:59,645 INFO status has been updated to running
2024-11-24 22:19:20,999 INFO status has been updated to successful
                                                                                        

Data for month 03 saved to era5_2024_month_03_north_america.nc.
Downloading data for month 04...


2024-11-24 22:22:19,364 INFO Request ID is e14d5bf7-7f04-43c1-bb25-e2b48d927ddf
2024-11-24 22:22:19,570 INFO status has been updated to accepted
2024-11-24 22:22:22,495 INFO status has been updated to running
2024-11-24 22:26:40,444 INFO status has been updated to successful
                                                                                        

Data for month 04 saved to era5_2024_month_04_north_america.nc.
Downloading data for month 05...


2024-11-24 22:28:32,104 INFO Request ID is 1e71da0b-81a4-46fb-89bf-663a43115aaf
2024-11-24 22:28:32,452 INFO status has been updated to accepted
2024-11-24 22:28:41,706 INFO status has been updated to running
2024-11-24 22:32:53,851 INFO status has been updated to successful
                                                                                        

Data for month 05 saved to era5_2024_month_05_north_america.nc.
Downloading data for month 06...


2024-11-24 22:33:19,695 INFO Request ID is 0d110543-48f3-49b2-a157-6fbb6e0e470d
2024-11-24 22:33:19,847 INFO status has been updated to accepted
2024-11-24 22:33:22,667 INFO status has been updated to running
2024-11-24 22:36:16,274 INFO status has been updated to successful
                                                                                        

Data for month 06 saved to era5_2024_month_06_north_america.nc.
Downloading data for month 07...


2024-11-24 22:36:41,324 INFO Request ID is 039481d0-e55c-4bac-9a08-b34163bde167
2024-11-24 22:36:41,489 INFO status has been updated to accepted
2024-11-24 22:36:44,301 INFO status has been updated to running
2024-11-24 22:41:02,794 INFO status has been updated to successful
                                                                                        

Data for month 07 saved to era5_2024_month_07_north_america.nc.
Downloading data for month 08...


2024-11-24 22:43:07,510 INFO Request ID is 07e22f29-c05f-47be-8e36-1ef66d8adea3
2024-11-24 22:43:07,665 INFO status has been updated to accepted
2024-11-24 22:43:13,059 INFO status has been updated to running
2024-11-24 22:47:28,219 INFO status has been updated to successful
                                                                                        

Data for month 08 saved to era5_2024_month_08_north_america.nc.
Downloading data for month 09...


2024-11-24 22:47:49,589 INFO Request ID is 82a7173d-b362-4919-9951-03d31238d869
2024-11-24 22:47:49,716 INFO status has been updated to accepted
2024-11-24 22:47:52,538 INFO status has been updated to running
2024-11-24 22:54:10,636 INFO status has been updated to successful
                                                                                        

Data for month 09 saved to era5_2024_month_09_north_america.nc.
Downloading data for month 10...


2024-11-24 22:56:05,472 INFO Request ID is 410ff86a-aeee-44b5-bd5b-66e4c5e7a3da
2024-11-24 22:56:05,624 INFO status has been updated to accepted
2024-11-24 22:56:11,257 INFO status has been updated to running
2024-11-24 23:00:27,428 INFO status has been updated to successful
                                                                                        

Data for month 10 saved to era5_2024_month_10_north_america.nc.
Downloading data for month 11...


2024-11-24 23:01:38,949 INFO Request ID is c724c912-4670-43ba-998a-edba382eac35
2024-11-24 23:01:39,128 INFO status has been updated to accepted
2024-11-24 23:01:44,337 INFO status has been updated to running
2024-11-24 23:06:00,631 INFO status has been updated to successful
                                                                                       

Data for month 11 saved to era5_2024_month_11_north_america.nc.




## Combine the nc file 

In [8]:
import xarray as xr
import os

# 设置数据文件夹和输出文件夹路径
data_directory = "Data"  # 存放原始 .nc 文件的目录
output_directory = "Merged_Batches"  # 存放分批次合并后的文件

# 创建输出文件夹（如果不存在）
os.makedirs(output_directory, exist_ok=True)

# 获取所有 .nc 文件的路径
file_paths = sorted([os.path.join(data_directory, f) for f in os.listdir(data_directory) if f.endswith(".nc")])

# 设置批次大小（例如每批合并 3 个文件）
batch_size = 3

# 分批合并文件
print("Starting batch merge...")
for batch_idx, i in enumerate(range(0, len(file_paths), batch_size)):
    # 获取当前批次的文件
    batch_files = file_paths[i:i + batch_size]
    print(f"Merging batch {batch_idx + 1} with files: {batch_files}")

    # 打开并合并当前批次的文件
    batch_ds = xr.open_mfdataset(batch_files, combine="by_coords")

    # 输出文件名
    batch_output_file = os.path.join(output_directory, f"batch_{batch_idx + 1}.nc")
    batch_ds.to_netcdf(batch_output_file)
    print(f"Batch {batch_idx + 1} saved to {batch_output_file}")

# 输出完成信息
print("All batches have been successfully merged and saved.")


Starting batch merge...
Merging batch 1 with files: ['Data/era5_2024_month_01_north_america.nc', 'Data/era5_2024_month_02_north_america.nc', 'Data/era5_2024_month_03_north_america.nc']
Batch 1 saved to Merged_Batches/batch_1.nc
Merging batch 2 with files: ['Data/era5_2024_month_04_north_america.nc', 'Data/era5_2024_month_05_north_america.nc', 'Data/era5_2024_month_06_north_america.nc']
Batch 2 saved to Merged_Batches/batch_2.nc
Merging batch 3 with files: ['Data/era5_2024_month_07_north_america.nc', 'Data/era5_2024_month_08_north_america.nc', 'Data/era5_2024_month_09_north_america.nc']
Batch 3 saved to Merged_Batches/batch_3.nc
Merging batch 4 with files: ['Data/era5_2024_month_10_north_america.nc', 'Data/era5_2024_month_11_north_america.nc']
Batch 4 saved to Merged_Batches/batch_4.nc
All batches have been successfully merged and saved.


In [10]:
import xarray as xr

# 文件路径列表
file_paths = [
    "Merged_Batches/batch_1.nc",
    "Merged_Batches/batch_2.nc",
    "Merged_Batches/batch_3.nc",
    "Merged_Batches/batch_4.nc"
]

# 检查每个文件
for file_path in file_paths:
    print(f"Checking file: {file_path}")
    try:
        ds = xr.open_dataset(file_path)
        print("Variables:", list(ds.data_vars.keys()))
        print("Dimensions:", ds.dims)
        print("Coordinates:", ds.coords)
        print(f"Time range: {ds['valid_time'].values[0]} to {ds['valid_time'].values[-1]}")
        print("File check passed.\n")
    except Exception as e:
        print(f"Error while loading {file_path}: {e}\n")


Checking file: Merged_Batches/batch_1.nc
Variables: ['tp', 't2m', 'u10', 'sp', 'ssr']
Coordinates: Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 3kB 2024-01-01 ... 2024-03-31T18:...
  * latitude    (latitude) float64 6kB 85.0 84.9 84.8 84.7 ... 10.2 10.1 10.0
  * longitude   (longitude) float64 10kB 190.0 190.1 190.2 ... 309.8 309.9 310.0
    expver      (valid_time) <U4 6kB ...
Time range: 2024-01-01T00:00:00.000000000 to 2024-03-31T18:00:00.000000000
File check passed.

Checking file: Merged_Batches/batch_2.nc
Variables: ['tp', 't2m', 'u10', 'sp', 'ssr']
Coordinates: Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 3kB 2024-04-01 ... 2024-06-30T18:...
  * latitude    (latitude) float64 6kB 85.0 84.9 84.8 84.7 ... 10.2 10.1 10.0
  * longitude   (longitude) float64 10kB 190.0 190.1 190.2 ... 309.8 309.9 310.0
    expver      (valid_time) <U4 6kB ...
Time range: 2024-04-01T00:00:00.000000000 to 2024-06-30T18:00

In [None]:
# 合并所有批次文件
try:
    ds_merged = xr.open_mfdataset(file_paths, combine="by_coords")
    print("Merged dataset dimensions:", ds_merged.dims)
    print("Merged dataset variables:", list(ds_merged.data_vars.keys()))
    print(f"Merged time range: {ds_merged['valid_time'].values[0]} to {ds_merged['valid_time'].values[-1]}")
    ds_merged.to_netcdf("Merged_Batches/merged_dataset.nc")
    print("All batches successfully merged and saved.")
except Exception as e:
    print(f"Error while merging batches: {e}")

In [11]:
# 合并所有批次文件
try:
    ds_merged = xr.open_mfdataset(file_paths, combine="by_coords")
    print("Merged dataset dimensions:", ds_merged.dims)
    print("Merged dataset variables:", list(ds_merged.data_vars.keys()))
    print(f"Merged time range: {ds_merged['valid_time'].values[0]} to {ds_merged['valid_time'].values[-1]}")
    ds_merged.to_netcdf("Merged_Batches/merged_dataset.nc")
    print("All batches successfully merged and saved.")
except Exception as e:
    print(f"Error while merging batches: {e}")


Merged dataset variables: ['tp', 't2m', 'u10', 'sp', 'ssr']
Merged time range: 2024-01-01T00:00:00.000000000 to 2024-11-20T00:00:00.000000000
All batches successfully merged and saved.


In [12]:
# 将数据转换为 DataFrame 并保存为 CSV
try:
    df = ds_merged["tp"].to_dataframe().reset_index()
    print("Data size:", df.shape)
    print(f"First few rows:\n{df.head()}")
    df.to_csv("Merged_Batches/precipitation_data.csv", index=False)
    print("Data saved as precipitation_data.csv")
except Exception as e:
    print(f"Error while converting dataset to CSV: {e}")


Error while converting dataset to CSV: Unable to allocate 17.4 GiB for an array with shape (1297, 751, 1201) and data type <U4
