<a href="https://colab.research.google.com/github/machiwao/CCTHESS1-CCTHESS2-Dev-and-Docs/blob/coco/ERA5-Land_Daily_Aggregated_'14_'23_Iba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install earthengine-api geemap --quiet

In [2]:
import ee
import geemap
import pandas as pd

In [3]:
# Authenticate with Google Earth Engine
ee.Authenticate()
ee.Initialize(project='heat-index-472312')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_7TDKVSyKvBdmMqW?ref=4i2o6


In [4]:
station_name = "Iba"   # Example, replace with your station
station_coords = [119.965661, 15.328408]  # [longitude, latitude] of your station
buffer_km = 25

In [5]:
station_geom = ee.Geometry.Point(station_coords).buffer(buffer_km * 1000)
print(f"Station: {station_name}, Geometry created")

Station: Iba, Geometry created


In [6]:
dataset = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR") \
  .filterDate('2014-01-01', '2023-12-31')

In [7]:
print("Available Bands:", dataset.first().bandNames().getInfo())
print("Dataset size:", dataset.size().getInfo())

Available Bands: ['dewpoint_temperature_2m', 'temperature_2m', 'skin_temperature', 'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4', 'lake_bottom_temperature', 'lake_ice_depth', 'lake_ice_temperature', 'lake_mix_layer_depth', 'lake_mix_layer_temperature', 'lake_shape_factor', 'lake_total_layer_temperature', 'snow_albedo', 'snow_cover', 'snow_density', 'snow_depth', 'snow_depth_water_equivalent', 'snowfall_sum', 'snowmelt_sum', 'temperature_of_snow_layer', 'skin_reservoir_content', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4', 'forecast_albedo', 'surface_latent_heat_flux_sum', 'surface_net_solar_radiation_sum', 'surface_net_thermal_radiation_sum', 'surface_sensible_heat_flux_sum', 'surface_solar_radiation_downwards_sum', 'surface_thermal_radiation_downwards_sum', 'evaporation_from_bare_soil_sum', 'evaporation_from_open_water_surfaces_excludi

In [8]:
iba = ee.Geometry.Point([119.965661, 15.328408]).buffer(25000)  # [longitude, latitude]
print(iba.getInfo())

def extract(img):
    vals = img.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=station_geom,
        scale=11132,  # ~11km resolution
        maxPixels=1e13,
        bestEffort=True
    )
    date_val = img.date().format("YYYY-MM-dd")
    feature = ee.Feature(None, vals).set("date", date_val)
    return feature

fc = dataset.map(extract)

{'type': 'Polygon', 'coordinates': [[[119.96566100000004, 15.55337964878704], [119.90043789221606, 15.544416279073035], [119.8404223723075, 15.518241885908326], [119.79040249986812, 15.476945936467589], [119.75436227823536, 15.42382342882165], [119.73516380971886, 15.36311013599052], [119.73432163283061, 15.299643179606276], [119.7518864823739, 15.238474659919868], [119.78644634076566, 15.184469499948253], [119.83524310718518, 15.141919496306457], [119.89439429929904, 15.114203878140488], [119.9592015143965, 15.103522713561972], [120.02452130666555, 15.110723655121532], [120.08516992072923, 15.135235252612919], [120.13633108518906, 15.175111906696868], [120.1739358695213, 15.227187044422566], [120.1949854478625, 15.287322832563566], [120.19779144927321, 15.350737264313931], [120.18211429559304, 15.41238329067671], [120.14918732436406, 15.467350302913486], [120.10162320650694, 15.511256111584835], [120.04320864370196, 15.540597895556063], [119.97860280737018, 15.553033521880044], [119.9

In [9]:
print("Feature collection size:", fc.size().getInfo())
sample = fc.limit(5).getInfo()
print("Sample features:", sample['features'][0]['properties'])

Feature collection size: 3651
Sample features: {'date': '2014-01-01', 'dewpoint_temperature_2m': 292.1144900230922, 'dewpoint_temperature_2m_max': 293.3519863146572, 'dewpoint_temperature_2m_min': 291.17522872423353, 'evaporation_from_bare_soil_max': 0, 'evaporation_from_bare_soil_min': -0.00037772275251664333, 'evaporation_from_bare_soil_sum': -0.002474791690829635, 'evaporation_from_open_water_surfaces_excluding_oceans_max': 0, 'evaporation_from_open_water_surfaces_excluding_oceans_min': -6.025452680553491e-05, 'evaporation_from_open_water_surfaces_excluding_oceans_sum': -0.0004057576619700788, 'evaporation_from_the_top_of_canopy_max': 5.7677099997168984e-06, 'evaporation_from_the_top_of_canopy_min': -1.79245458596644e-05, 'evaporation_from_the_top_of_canopy_sum': -1.9723370216542005e-06, 'evaporation_from_vegetation_transpiration_max': -4.465204877822613e-06, 'evaporation_from_vegetation_transpiration_min': -1.6487230428717512e-05, 'evaporation_from_vegetation_transpiration_sum': -0

In [10]:
dates = fc.aggregate_array("date").getInfo()
var_names = dataset.first().bandNames().getInfo()

In [11]:
data = {}
for v in var_names:
    try:
        values = fc.aggregate_array(v).getInfo()
        data[v] = values
        print(f"Extracted {len([x for x in values if x is not None])} non-null values for {v}")
    except Exception as e:
        print(f"Error extracting {v}: {e}")
        data[v] = [None] * len(dates)

# Create DataFrame
df = pd.DataFrame({"date": dates})

Extracted 3651 non-null values for dewpoint_temperature_2m
Extracted 3651 non-null values for temperature_2m
Extracted 3651 non-null values for skin_temperature
Extracted 3651 non-null values for soil_temperature_level_1
Extracted 3651 non-null values for soil_temperature_level_2
Extracted 3651 non-null values for soil_temperature_level_3
Extracted 3651 non-null values for soil_temperature_level_4
Extracted 3651 non-null values for lake_bottom_temperature
Extracted 3651 non-null values for lake_ice_depth
Extracted 3651 non-null values for lake_ice_temperature
Extracted 3651 non-null values for lake_mix_layer_depth
Extracted 3651 non-null values for lake_mix_layer_temperature
Extracted 3651 non-null values for lake_shape_factor
Extracted 3651 non-null values for lake_total_layer_temperature
Extracted 3651 non-null values for snow_albedo
Extracted 3651 non-null values for snow_cover
Extracted 3651 non-null values for snow_density
Extracted 3651 non-null values for snow_depth
Extracted 36

In [12]:
for v in var_names:
    if v in data:
        df[v] = data[v]
        null_count = df[v].isnull().sum()
        print(f"Column {v}: {len(df[v]) - null_count} valid values, {null_count} null values")

print("\nDataFrame shape:", df.shape)
print("DataFrame info:")
print(df.info())
print("\nFirst few rows:")
print(df.head())

Column dewpoint_temperature_2m: 3651 valid values, 0 null values
Column temperature_2m: 3651 valid values, 0 null values
Column skin_temperature: 3651 valid values, 0 null values
Column soil_temperature_level_1: 3651 valid values, 0 null values
Column soil_temperature_level_2: 3651 valid values, 0 null values
Column soil_temperature_level_3: 3651 valid values, 0 null values
Column soil_temperature_level_4: 3651 valid values, 0 null values
Column lake_bottom_temperature: 3651 valid values, 0 null values
Column lake_ice_depth: 3651 valid values, 0 null values
Column lake_ice_temperature: 3651 valid values, 0 null values
Column lake_mix_layer_depth: 3651 valid values, 0 null values
Column lake_mix_layer_temperature: 3651 valid values, 0 null values
Column lake_shape_factor: 3651 valid values, 0 null values
Column lake_total_layer_temperature: 3651 valid values, 0 null values
Column snow_albedo: 3651 valid values, 0 null values
Column snow_cover: 3651 valid values, 0 null values
Column sno

  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]
  df[v] = data[v]


In [13]:
metadata = []
for col in df.columns:
    if col == "date":
        continue

    # Skip if all values are null
    if df[col].isnull().all():
        print(f"Skipping {col} - all values are null")
        continue

    original_name = col
    original_unit = "varies"
    converted_unit = original_unit
    new_name = col

    # Kelvin → Celsius
    if "temperature" in col.lower():
        df[col] = df[col] - 273.15
        new_name = col + "_C"
        original_unit, converted_unit = "K", "°C"
    # Precipitation / Runoff / Evaporation (m → mm/day)
    elif col.endswith("_sum") or "precipitation" in col.lower() or "evaporation" in col.lower() or "runoff" in col.lower():
        df[col] = df[col] * 1000
        new_name = col + "_mm"
        original_unit, converted_unit = "m", "mm/day"
    # Pressure (Pa → hPa)
    elif "pressure" in col.lower():
        df[col] = df[col] / 100
        new_name = col + "_hPa"
        original_unit, converted_unit = "Pa", "hPa"
    # Wind (m/s → km/h)
    elif "u_component" in col.lower() or "v_component" in col.lower() or "wind" in col.lower():
        df[col] = df[col] * 3.6
        new_name = col + "_kmh"
        original_unit, converted_unit = "m/s", "km/h"
    # Snow depth (m → cm)
    elif "snow_depth" in col.lower():
        df[col] = df[col] * 100
        new_name = col + "_cm"
        original_unit, converted_unit = "m", "cm"
    # Radiation & Fluxes (J/m²/day → W/m²)
    elif col.endswith("_radiation_sum") or col.endswith("_flux_sum") or col.endswith("_heat_sum"):
        df[col] = df[col] / 86400
        new_name = col + "_Wm2"
        original_unit, converted_unit = "J/m²/day", "W/m²"

    # Rename column
    if new_name != col:
        df.rename(columns={col: new_name}, inplace=True)

    # Add metadata record
    metadata.append({
        "Band": original_name,
        "Converted Column": new_name,
        "Original Unit": original_unit,
        "Converted Unit": converted_unit
    })

In [14]:
meta_df = pd.DataFrame(metadata)
print("\nMetadata:")
print(meta_df)


Metadata:
                                    Band                     Converted Column  \
0                dewpoint_temperature_2m            dewpoint_temperature_2m_C   
1                         temperature_2m                     temperature_2m_C   
2                       skin_temperature                   skin_temperature_C   
3               soil_temperature_level_1           soil_temperature_level_1_C   
4               soil_temperature_level_2           soil_temperature_level_2_C   
..                                   ...                                  ...   
145              total_precipitation_max           total_precipitation_max_mm   
146  leaf_area_index_high_vegetation_min  leaf_area_index_high_vegetation_min   
147  leaf_area_index_high_vegetation_max  leaf_area_index_high_vegetation_max   
148   leaf_area_index_low_vegetation_min   leaf_area_index_low_vegetation_min   
149   leaf_area_index_low_vegetation_max   leaf_area_index_low_vegetation_max   

    Original Uni

In [15]:
from google.colab import drive
drive.mount('/content/drive')

# FIXED: Updated filename to reflect correct location (Aparri)
filename = "ERA5-Land_Daily_Aggregated_'14_'23_Iba.csv"
drive_path = f'/content/drive/MyDrive/ERA5_datasets/{filename}'

try:
    df.to_csv(drive_path, index=False)
    print(f"File saved to Google Drive: {filename}")
    print(f"Final DataFrame shape: {df.shape}")
except Exception as e:
    print(f"Error saving file: {e}")
    # Save locally as backup
    df.to_csv(filename, index=False)
    print(f"File saved locally: {filename}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File saved to Google Drive: ERA5-Land_Daily_Aggregated_'14_'23_Iba.csv
Final DataFrame shape: (3651, 151)
