# ERA5-Land Monthly Averaged

ERA5-Land is a high-resolution reanalysis dataset that provides a consistent and detailed view of land variables over several decades, combining model data with atmospheric forcing from ERA5 to ensure accuracy. By correcting input variables for altitude differences and leveraging indirect observational influences, it offers enhanced precision for land surface applications like flood and drought forecasting. Despite some inherent uncertainties, ERA5-Land's extensive temporal and spatial resolution makes it a valuable resource for decision-making and environmental analysis.

**Information on Dataset:**
* Source: [ERA5-Land Hourly Data](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-land-monthly-means?tab=overview)
* Author:
* Notebook Version: 1.1 (Updated: December 09, 2024)

## 1. Specifying the paths and working directories

In [1]:
import os

''' ---- Hier die Verzeichnisse angeben ---- '''
download_folder = r".\data\era5-land-monthly-data\download"
working_folder = r".\data\era5-land-monthly-data\working"
geotiff_folder = r".\data\era5-land-monthly-data\geotiff"
csv_folder = r".\data\era5-land-monthly-data\csv"
output_folder = r".\data\era5-land-monthly-data\output"
''' ----- Ende der Eingaben ---- '''

os.makedirs(download_folder, exist_ok=True)
os.makedirs(working_folder, exist_ok=True)
os.makedirs(geotiff_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

## 2. Download and Extract Dataset

### 2.1 Authentication

In [2]:
import cdsapi

def main():
    api_key = "fdae60fd-35d4-436f-825c-c63fedab94a4"
    api_url = "https://cds.climate.copernicus.eu/api"
    client = cdsapi.Client(url=api_url, key=api_key)
    return client

### 2.2 Request Definition and Download

#### 1. Create list of variables

The list of variables are available at [ERA5-Land Monthly Averaged](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-land-monthly-means?tab=overview)

*Please check and modify the predefined variable name if any issue arises with variable name.* 

In [3]:
# Variable Groups
var_group_temperature = [
    "2m_dewpoint_temperature",
    "2m_temperature",
    "skin_temperature",
    "soil_temperature_level_1",
    "soil_temperature_level_2",
    "soil_temperature_level_3",
    "soil_temperature_level_4"]

var_group_lake = [
    "lake_bottom_temperature",
    "lake_ice_depth",
    "lake_ice_temperature",
    "lake_mix_layer_depth",
    "lake_mix_layer_temperature",
    "lake_shape_factor",
    "lake_total_layer_temperature"]

var_group_snow = [
    "snow_albedo",
    "snow_cover",
    "snow_density",
    "snow_depth",
    "snow_depth_water_equivalent",
    "snowfall",
    "snowmelt",
    "temperature_of_snow_layer"]

var_group_soil_water = [
    "skin_reservoir_content",
    "volumetric_soil_water_layer_1",
    "volumetric_soil_water_layer_2",
    "volumetric_soil_water_layer_3",
    "volumetric_soil_water_layer_4"]

var_group_radiation_and_heat = [
    "forecast_albedo",
    "surface_latent_heat_flux",
    "surface_net_solar_radiation",
    "surface_net_thermal_radiation",
    "surface_sensible_heat_flux",
    "surface_solar_radiation_downwards",
    "surface_thermal_radiation_downwards"]

var_group_evaporation_and_runoff = [
    "evaporation_from_bare_soil",
    "evaporation_from_open_water_surfaces_excluding_oceans",
    "evaporation_from_the_top_of_canopy",
    "evaporation_from_vegetation_transpiration",
    "potential_evaporation",
    "runoff",
    "snow_evaporation",
    "sub_surface_runoff",
    "surface_runoff",
    "total_evaporation"]

var_group_wind_pressure_and_precipitation = [
    "10m_u_component_of_wind",
    "10m_v_component_of_wind",
    "surface_pressure",
    "total_precipitation"]

var_group_vegetation = [
    "leaf_area_index_high_vegetation",
    "leaf_area_index_low_vegetation"]

# List of variable group
var_group_list = ['var_group_temperature',
                  'var_group_lake',
                  'var_group_snow',
                  'var_group_soil_water',
                  'var_group_radiation_and_heat',
                  'var_group_evaporation_and_runoff',
                  'var_group_wind_pressure_and_precipitation',
                  'var_group_vegetation']

#### 2. Define request parameters for download

In [4]:
import ipywidgets as widgets

selected_variable_group = widgets.Dropdown(
    options=var_group_list,
    value=var_group_list[0],
    description="Select a variable group",
    style=dict(description_width='initial'),
    layout=widgets.Layout(width='50%'),
)

selected_variable_group

Dropdown(description='Select a variable group', layout=Layout(width='50%'), options=('var_group_temperature', …

In [5]:
current_variable_group = globals().get(selected_variable_group.value)

selected_variable = widgets.Dropdown(
    options=current_variable_group,
    value=current_variable_group[1],
    description="Select the variable of interest",
    style=dict(description_width='initial'),
    layout=widgets.Layout(width='50%'),
)

selected_variable

Dropdown(description='Select the variable of interest', index=1, layout=Layout(width='50%'), options=('2m_dewp…

#### 3. Define Bounding Box Extents (Bbox) for the Dataset

In [6]:
# Define additional request fields to ensure the request stays within the file size limit.
# These coordinates were obtained using the BBox Extractor tool:
# https://str-ucture.github.io/bbox-extractor/

bbox_wgs84_deutschland = [56.0, 5.8, 47.2, 15.0] # North, West, South, East
bbox_wgs84_konstanz = [47.9, 8.9, 47.6, 9.3]

# Alternatively, use a shapefile for precise geographic filtering
import geopandas as gpd
import math

# Example: Load shapefile of Konstanz (WGS84 projection)
de_shapefile = r"./shapefiles/de_boundary.shp"
de_gdf = gpd.read_file(de_shapefile)
de_bounds = de_gdf.total_bounds

# Adjust and buffer
de_bounds_adjusted = [(math.floor(de_bounds[0]* 10)/10)-0.1,
                      (math.floor(de_bounds[1]* 10)/10)-0.1,
                      (math.ceil(de_bounds[2]* 10)/10)+0.1,
                      (math.ceil(de_bounds[3]* 10)/10)+0.1]

bbox_de_bounds_adjusted = [de_bounds_adjusted[3], de_bounds_adjusted[0],
                           de_bounds_adjusted[1], de_bounds_adjusted[2]]

#### 4. Define request parameters

In [7]:
dataset = "reanalysis-era5-land-monthly-means"
request = {
    "product_type": ["monthly_averaged_reanalysis_by_hour_of_day"],
    "variable": selected_variable.value,
    "year": [str(year) for year in range(1950,2024+1,1)],
    "month": [str(month) for month in range(13)],
    "time": [f"{hour:02d}:00" for hour in range(24)],  ## Selecting multiple hours doesnt generate all the time steps. to-check: if the value is    
    "data_format": "netcdf",
    "download_format": "unarchived",
    "area": bbox_de_bounds_adjusted
}

In [8]:
# # Run this cell to download the dataset:
# # check if all time steps are available
# dataset_filename = f"{dataset}-{request['product_type'][0]}-{selected_variable.value}_check2.nc"
# dataset_filepath = os.path.join(download_folder, dataset_filename)

# def main_retrieve():
#     # Download the dataset only if the dataset has not been downloaded before
#     if not os.path.isfile(dataset_filepath):
#         # Download the dataset with the defined request parameters
#         client.retrieve(dataset, request, dataset_filepath)
#     else:
#         print("Dataset already downloaded.")

# if __name__ == "__main__":
#     client = main()
#     main_retrieve()

### 2.3 Extract the Zip folder

(**Note**: Since the downloaded dataset is only for 1 single variable, 1 netCDF file is downloaded and CDS does not create a zip file for single variable netCDF file)

In [9]:
# import zipfile

# extract_folder = os.path.join(working_folder, f"{selected_variable.value}")
# os.makedirs(extract_folder, exist_ok=True)

# # Extract the zip file
# try:
#     if not os.listdir(extract_folder):
#         dataset_filename = dataset_filename = f"{dataset}-{request['product_type'][0]}-{selected_variable.value}.nc"
#         dataset_filepath = os.path.join(download_folder, dataset_filename)

#         with zipfile.ZipFile(dataset_filepath, 'r') as zip_ref:
#             zip_ref.extractall(extract_folder)
#             print(f"Successfully extracted files to: {extract_folder}")
#     else:
#         print("Folder is not empty. Skipping extraction.")
# except FileNotFoundError:
#     print(f"Error: The file {dataset_filepath} was not found.")
# except zipfile.BadZipFile:
#     print(f"Error: The file {dataset_filepath} is not a valid zip file.")
# except Exception as e:
#     print(f"An unexpected error occurred: {e}")

## 3. Read the netCDF file and print the metadata

In [10]:
# import netCDF4 as nc

# nc_dataset = nc.Dataset(dataset_filepath, mode='r')

# # List all variables in the dataset
# variables_list = list(nc_dataset.variables.keys())

# print(f"Available variables: {variables_list}")

In [11]:
# import pandas as pd

# rows = []
# for test_var in variables_list:
#     try:
#         var_obj = nc_dataset.variables[test_var]
#         unit = getattr(var_obj, 'units', 'N/A')
#         shape = var_obj.shape
#         rows.append({
#             "nc_variables": test_var,
#             "unit": unit,
#             "shape": shape
#         })
#     except Exception as e:
#         print(f"Error processing variable {test_var}: {e}")

# # Create a DataFrame
# df = pd.DataFrame(rows)
# df

In [12]:
# variable_name = 't2m'
# variable_data = nc_dataset[variable_name]

# # Generate summary of the primary variable
# summary = {
#     "Variable Name": variable_name,
#     "Data Type": variable_data.dtype,
#     "Shape": variable_data.shape,
#     "Variable Info": f"{variable_data.dimensions}",
#     "Units": getattr(variable_data, "units", "N/A"),
#     "Long Name": getattr(variable_data, "long_name", "N/A"),
# }

# # Display dataset summary as a DataFrame for better visualization
# nc_summary = pd.DataFrame(list(summary.items()), columns=['Description', 'Remarks'])

# # Display the summary DataFrame
# display(nc_summary)

## 4. Export Dataset to CSV

In [13]:
# # Define additional request fields to ensure the request stays within the file size limit.
# # These coordinates were obtained using the BBox Extractor tool:
# # https://str-ucture.github.io/bbox-extractor/

# bbox_wgs84_konstanz = [47.9, 8.9, 47.6, 9.3]  # Format: [North, West, South, East]
# bbox_wgs84_konstanz_standard = [9.0, 47.6, 9.3, 47.8]  # Standard format: [West, South, East, North]
# bbox_wgs84_de_standard = [5.7, 47.1, 15.2, 55.2]  # Standard format: [West, South, East, North]

In [14]:
# import xarray as xr

# # Open the NetCDF dataet using xarray
# def netcdf_to_dataframe(nc_file, bounding_box=None):
#     with xr.open_dataset(nc_file) as nc_dataset:
        
#         variable_data = nc_dataset[variable_name]
        
#         # Ensure latitude and longitude names are correct
#         latitude_name = 'latitude' if 'latitude' in nc_dataset.coords else 'lat'
#         longitude_name = 'longitude' if 'longitude' in nc_dataset.coords else 'lon'

#         if bounding_box:
#             # Filter the data based on latitude and longitude
#             filtered_data = variable_data.where(
#                 (nc_dataset[latitude_name] >= bounding_box[1]) & (nc_dataset[latitude_name] <= bounding_box[3]) &
#                 (nc_dataset[longitude_name] >= bounding_box[0]) & (nc_dataset[longitude_name] <= bounding_box[2]),
#                 drop=True
#             )
#         else:
#             filtered_data = variable_data

#         # Convert the variable data to a DataFrame
#         df = filtered_data.to_dataframe().reset_index().set_index(['valid_time', 'latitude', 'longitude'])
#         return df

# df = netcdf_to_dataframe(nc_file=dataset_filepath, bounding_box=bbox_wgs84_konstanz_standard)
# df