<a href="https://colab.research.google.com/github/machiwao/CCTHESS1-CCTHESS2-Dev-and-Docs/blob/coli/ERA5_VIIRS_LANDSAT_ALBEDO_14_'23.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Script by Eco ERA5

In [None]:
!pip install earthengine-api --upgrade
!pip install geemap --upgrade

Collecting earthengine-api
  Downloading earthengine_api-1.6.8-py3-none-any.whl.metadata (2.2 kB)
Downloading earthengine_api-1.6.8-py3-none-any.whl (463 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m463.2/463.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: earthengine-api
  Attempting uninstall: earthengine-api
    Found existing installation: earthengine-api 1.5.24
    Uninstalling earthengine-api-1.5.24:
      Successfully uninstalled earthengine-api-1.5.24
Successfully installed earthengine-api-1.6.8
Collecting geemap
  Downloading geemap-0.36.3-py3-none-any.whl.metadata (14 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->ipyfilechooser>=0.6.0->geemap)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading geemap-0.36.3-py3-none-any.whl (631 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m631.3/631.3 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jedi-0.19.2-

In [None]:
import ee, pandas as pd
import geemap

In [None]:
ee.Authenticate()
ee.Initialize(project='data-scrapping-470316')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_7TDKVSyKvBdmMqW?ref=4i2o6


In [None]:
# ERA5-Land Daily Aggregates dataset
dataset = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR") \
  .filterDate('2014-01-01', '2024-01-01')

In [None]:
# Print available variables (bands)
bands = dataset.first().bandNames().getInfo()
print("Available Bands:", bands)
print("Dataset size:", dataset.size().getInfo())

Available Bands: ['dewpoint_temperature_2m', 'temperature_2m', 'skin_temperature', 'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4', 'lake_bottom_temperature', 'lake_ice_depth', 'lake_ice_temperature', 'lake_mix_layer_depth', 'lake_mix_layer_temperature', 'lake_shape_factor', 'lake_total_layer_temperature', 'snow_albedo', 'snow_cover', 'snow_density', 'snow_depth', 'snow_depth_water_equivalent', 'snowfall_sum', 'snowmelt_sum', 'temperature_of_snow_layer', 'skin_reservoir_content', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4', 'forecast_albedo', 'surface_latent_heat_flux_sum', 'surface_net_solar_radiation_sum', 'surface_net_thermal_radiation_sum', 'surface_sensible_heat_flux_sum', 'surface_solar_radiation_downwards_sum', 'surface_thermal_radiation_downwards_sum', 'evaporation_from_bare_soil_sum', 'evaporation_from_open_water_surfaces_excludi

In [None]:
# FIXED: Corrected coordinates for baler, Philippines
baler = ee.Geometry.Point([121.970536,  20.427284]).buffer(25000)  # [longitude, latitude]
print("Geometry created:", baler.getInfo())

def extract(img):
    # Add error handling and debugging
    vals = img.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=baler,
        scale=11132,  # ERA5-Land native resolution is ~11km
        maxPixels=1e13,
        bestEffort=True  # Add this to handle large geometries
    )

    # Get the date
    date_val = img.date().format("YYYY-MM-dd")

    # Create feature with both values and date
    feature = ee.Feature(None, vals).set("date", date_val)
    return feature

# Apply extraction to all images
fc = dataset.map(extract)

Geometry created: {'type': 'Polygon', 'coordinates': [[[121.97053600000001, 20.65225564878704], [121.90338716124897, 20.643288882306127], [121.84160547459979, 20.617105362582638], [121.79012388896176, 20.575797413657597], [121.75304374153019, 20.522663786501155], [121.7333072757712, 20.461943726982682], [121.73246643854283, 20.39847646478299], [121.75056536522541, 20.337314189066134], [121.78614398290162, 20.283319870392138], [121.83636022252226, 20.24078192452345], [121.8972192856551, 20.21307582571173], [121.96389082629514, 20.20239868027073], [122.03108908508008, 20.209596890825473], [122.09348706550985, 20.23409985646023], [122.14613378328285, 20.27396466317643], [122.1848434366507, 20.326028426625697], [122.20652703578881, 20.386156859461035], [122.20944060624039, 20.44957025308714], [122.19332954187226, 20.511221888913735], [122.15945593935045, 20.566199412245833], [122.11050454097996, 20.61011735199697], [122.05037273533242, 20.63946909383302], [121.9838601156185, 20.65190938814

In [None]:
# Debug: Check if feature collection is properly created
print("Feature collection size:", fc.size().getInfo())

# Get a sample to check if data extraction worked
sample = fc.limit(5).getInfo()
print("Sample features:", sample['features'][0]['properties'])

Feature collection size: 3652
Sample features: {'date': '2014-01-01'}


In [None]:
# Extract dates and data
dates = fc.aggregate_array("date").getInfo()
print(f"Number of dates extracted: {len(dates)}")
print(f"First few dates: {dates[:5]}")

var_names = dataset.first().bandNames().getInfo()
print(f"Variable names: {var_names}")

Number of dates extracted: 3652
First few dates: ['2014-01-01', '2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05']
Variable names: ['dewpoint_temperature_2m', 'temperature_2m', 'skin_temperature', 'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4', 'lake_bottom_temperature', 'lake_ice_depth', 'lake_ice_temperature', 'lake_mix_layer_depth', 'lake_mix_layer_temperature', 'lake_shape_factor', 'lake_total_layer_temperature', 'snow_albedo', 'snow_cover', 'snow_density', 'snow_depth', 'snow_depth_water_equivalent', 'snowfall_sum', 'snowmelt_sum', 'temperature_of_snow_layer', 'skin_reservoir_content', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4', 'forecast_albedo', 'surface_latent_heat_flux_sum', 'surface_net_solar_radiation_sum', 'surface_net_thermal_radiation_sum', 'surface_sensible_heat_flux_sum', 'surface_solar_radiation_downwards_sum', 'su

In [None]:
# FIXED: Better error handling for data extraction
data = {}
for v in var_names:
    try:
        values = fc.aggregate_array(v).getInfo()
        data[v] = values
        print(f"Extracted {len([x for x in values if x is not None])} non-null values for {v}")
    except Exception as e:
        print(f"Error extracting {v}: {e}")
        data[v] = [None] * len(dates)

# Create DataFrame
df = pd.DataFrame({"date": dates})

Extracted 0 non-null values for dewpoint_temperature_2m
Extracted 0 non-null values for temperature_2m
Extracted 0 non-null values for skin_temperature
Extracted 0 non-null values for soil_temperature_level_1
Extracted 0 non-null values for soil_temperature_level_2
Extracted 0 non-null values for soil_temperature_level_3
Extracted 0 non-null values for soil_temperature_level_4
Extracted 0 non-null values for lake_bottom_temperature
Extracted 0 non-null values for lake_ice_depth
Extracted 0 non-null values for lake_ice_temperature
Extracted 0 non-null values for lake_mix_layer_depth
Extracted 0 non-null values for lake_mix_layer_temperature
Extracted 0 non-null values for lake_shape_factor
Extracted 0 non-null values for lake_total_layer_temperature
Extracted 0 non-null values for snow_albedo
Extracted 0 non-null values for snow_cover
Extracted 0 non-null values for snow_density
Extracted 0 non-null values for snow_depth
Extracted 0 non-null values for snow_depth_water_equivalent
Extrac

In [None]:
# Add data columns with null checking
for v in var_names:
    if v in data:
        df[v] = data[v]
        null_count = df[v].isnull().sum()
        print(f"Column {v}: {len(df[v]) - null_count} valid values, {null_count} null values")

print("\nDataFrame shape:", df.shape)
print("DataFrame info:")
print(df.info())
print("\nFirst few rows:")
print(df.head())

ValueError: Length of values (0) does not match length of index (3652)

In [None]:
# Unit conversions (only if data exists)
metadata = []
for col in df.columns:
    if col == "date":
        continue

    # Skip if all values are null
    if df[col].isnull().all():
        print(f"Skipping {col} - all values are null")
        continue

    original_name = col
    original_unit = "varies"
    converted_unit = original_unit
    new_name = col

    # Kelvin → Celsius
    if "temperature" in col.lower():
        df[col] = df[col] - 273.15
        new_name = col + "_C"
        original_unit, converted_unit = "K", "°C"
    # Precipitation / Runoff / Evaporation (m → mm/day)
    elif col.endswith("_sum") or "precipitation" in col.lower() or "evaporation" in col.lower() or "runoff" in col.lower():
        df[col] = df[col] * 1000
        new_name = col + "_mm"
        original_unit, converted_unit = "m", "mm/day"
    # Pressure (Pa → hPa)
    elif "pressure" in col.lower():
        df[col] = df[col] / 100
        new_name = col + "_hPa"
        original_unit, converted_unit = "Pa", "hPa"
    # Wind (m/s → km/h)
    elif "u_component" in col.lower() or "v_component" in col.lower() or "wind" in col.lower():
        df[col] = df[col] * 3.6
        new_name = col + "_kmh"
        original_unit, converted_unit = "m/s", "km/h"
    # Snow depth (m → cm)
    elif "snow_depth" in col.lower():
        df[col] = df[col] * 100
        new_name = col + "_cm"
        original_unit, converted_unit = "m", "cm"
    # Radiation & Fluxes (J/m²/day → W/m²)
    elif col.endswith("_radiation_sum") or col.endswith("_flux_sum") or col.endswith("_heat_sum"):
        df[col] = df[col] / 86400
        new_name = col + "_Wm2"
        original_unit, converted_unit = "J/m²/day", "W/m²"

    # Rename column
    if new_name != col:
        df.rename(columns={col: new_name}, inplace=True)

    # Add metadata record
    metadata.append({
        "Band": original_name,
        "Converted Column": new_name,
        "Original Unit": original_unit,
        "Converted Unit": converted_unit
    })

In [None]:
# Build metadata DataFrame
meta_df = pd.DataFrame(metadata)
print("\nMetadata:")
print(meta_df)

In [None]:
from google.colab import files

filename = "ERA5_Baler.csv"
df.to_csv(filename, index=False)

# --- Download ---
files.download(filename)


# Another ERA5 Script Used by Jessy

In [None]:
def extract_and_process_viirs(point, dataset):
    def extract(img):
        vals = img.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=500,          # VIIRS ~500m resolution
            maxPixels=1e13
        )
        return ee.Feature(None, vals).set("date", img.date().format("YYYY-MM-dd"))

    # Map over dataset
    fc = dataset.map(extract)
    features = fc.getInfo()["features"]

    rows = []
    for f in features:
        props = f["properties"]
        row = {
            "date": props.get("date"),
            "NDVI": props.get("NDVI", None),
            "TIMEOFDAY": props.get("TIMEOFDAY", None),
            "QA": props.get("QA", None)   # keep QA instead of dropping
        }
        rows.append(row)

    # Explicitly define all columns (so no "extra" blank ones appear)
    df = pd.DataFrame(rows, columns=["date", "NDVI", "TIMEOFDAY", "QA"])

    # Scale values according to metadata
    # if "NDVI" in df:
    #     df["NDVI"] = df["NDVI"].apply(lambda x: None if x is None else x * 0.0001)
    # if "TIMEOFDAY" in df:
    #     df["TIMEOFDAY"] = df["TIMEOFDAY"].apply(lambda x: None if x is None else x * 0.01)
    if "NDVI" in df:
          df["NDVI"] = df["NDVI"].apply(lambda x: None if x is None else x / 10000)


    # QA stays as raw integer bitmask

    return df


In [None]:
# Print available variables (bands)
bands = dataset.first().bandNames().getInfo()
print("Available Bands:", bands)
print("Dataset size:", dataset.size().getInfo())

Available Bands: ['NDVI', 'TIMEOFDAY', 'QA']
Dataset size: 3636


In [None]:
import ee
import pandas as pd
import geemap

cloud_project = 'data-scrapping-470316'

try:
  ee.Initialize(project=cloud_project)
except:
  ee.Authenticate()
  ee.Initialize(project=cloud_project)

dataset = ee.ImageCollection("NOAA/CDR/VIIRS/NDVI/V1").filterDate('2014-01-01', '2024-01-01')

# 1. Define ee.Geometry.Point objects for each of the four additional stations
appari_point = ee.Geometry.Point([121.630454,  18.360059]).buffer(50000)
basco_point = ee.Geometry.Point([121.970536,  20.427284]).buffer(50000)
baler_point = ee.Geometry.Point([121.632028,  15.748809]).buffer(50000)
ambulong_point = ee.Geometry.Point([121.0552444,  14.09008056]).buffer(50000)
cabanatuan_point = ee.Geometry.Point(([120.951143,  15.470387])).buffer(50000)


# 2. Create a dictionary to store the station names and their corresponding point geometries
stations = {
    "Appari":  appari_point,
    "Basco": basco_point,
    "Baler": baler_point,
    "Ambulong": ambulong_point,
    "Cabanatuan": cabanatuan_point
}

# 3. and 4. Iterate through the stations and extract/process data
station_dataframes = {}
for station_name, station_point in stations.items():
    print(f"Extracting data for {station_name}...")
    try:
        df_station = extract_and_process_viirs(station_point, dataset)
        station_dataframes[station_name] = df_station
        filename = f"NDVI_{station_name}.csv"
        df_station.to_csv(filename, index=False)
        files.download(filename)
        print(f"✅ Saved {filename} with shape {df_station.shape}")
    except Exception as e:
        print(f"❌ Error for {station_name}: {e}")

Extracting data for Appari...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved NDVI_Appari.csv with shape (3636, 4)
Extracting data for Basco...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved NDVI_Basco.csv with shape (3636, 4)
Extracting data for Baler...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved NDVI_Baler.csv with shape (3636, 4)
Extracting data for Ambulong...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved NDVI_Ambulong.csv with shape (3636, 4)
Extracting data for Cabanatuan...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved NDVI_Cabanatuan.csv with shape (3636, 4)


# Landsat Operation


In [None]:
def extract_and_process_hlsl(point, dataset, start_year=2014, end_year=2024):
    band_names = dataset.first().bandNames().getInfo()
    all_dfs = []

    for year in range(start_year, end_year + 1):
        subset = dataset.filterDate(f"{year}-01-01", f"{year}-12-31")
        # Reduce over the year instead of mapping each image
        mean_img = subset.mean()
        vals = mean_img.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=30,
            maxPixels=1e13
        )
        row = {"date": str(year)}
        for b in band_names:
            row[b] = vals.get(b)
        df_year = pd.DataFrame([row], columns=["date"] + band_names)
        all_dfs.append(df_year)

    df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame(columns=["date"] + band_names)
    return df


In [None]:
import ee
import pandas as pd
from google.colab import files

# --- Authenticate and Initialize ---
ee.Authenticate()
ee.Initialize(project='data-scrapping-470316')

# --- Dataset ---
dataset = ee.ImageCollection("NASA/HLS/HLSL30/v002") \
             .filterDate('2014-01-01', '2024-01-01')

# --- Stations ---
stations = {
    "Aparri": ee.Geometry.Point([121.630454, 18.360059]).buffer(10000),
    "Basco": ee.Geometry.Point([121.970536, 20.427284]).buffer(10000),
    "Baler": ee.Geometry.Point([121.632028, 15.748809]).buffer(10000),
    "Ambulong": ee.Geometry.Point([121.0552444, 14.09008056]).buffer(10000),
    "Cabanatuan": ee.Geometry.Point([120.951143, 15.470387]).buffer(10000)
}

# --- Storage for results ---
station_dataframes = {}

# --- Extract and save ---
for station_name, station_point in stations.items():
    print(f"Extracting data for {station_name}...")
    try:
        df_station = extract_and_process_hlsl(station_point, dataset)
        station_dataframes[station_name] = df_station
        filename = f"HLSL30_{station_name}.csv"   # ✅ one CSV per station
        df_station.to_csv(filename, index=False)
        files.download(filename)
        print(f"✅ Saved {filename} with shape {df_station.shape}")
    except Exception as e:
        print(f"❌ Error for {station_name}: {e}")



Extracting data for Aparri...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved HLSL30_Aparri.csv with shape (11, 16)
Extracting data for Basco...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved HLSL30_Basco.csv with shape (11, 16)
Extracting data for Baler...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved HLSL30_Baler.csv with shape (11, 16)
Extracting data for Ambulong...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved HLSL30_Ambulong.csv with shape (11, 16)
Extracting data for Cabanatuan...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Saved HLSL30_Cabanatuan.csv with shape (11, 16)


# Albedo

In [None]:
!pip install earthengine-api geemap --upgrade



In [None]:
import ee
import geemap
import pandas as pd
from scipy.signal import savgol_filter

In [None]:
ee.Authenticate()
ee.Initialize(project='data-scrapping-470316')

In [None]:
# --- Define stations ---
stations = {
    "Aparri": ee.Geometry.Point([121.630454, 18.360059]).buffer(10000),
    "Basco": ee.Geometry.Point([121.970536, 20.427284]).buffer(10000),
    "Baler": ee.Geometry.Point([121.632028, 15.748809]).buffer(10000),
    "Ambulong": ee.Geometry.Point([121.0552444, 14.09008056]).buffer(10000),
    "Cabanatuan": ee.Geometry.Point([120.951143, 15.470387]).buffer(10000)
}
start_date = "2014-01-01"   # MODIS MCD43 start
end_date   = "2024-01-01"
full_range = pd.date_range(start_date, end_date, freq="D")

albedo_coll = ee.ImageCollection("MODIS/061/MCD43A3").filterDate(start_date, end_date)
quality_coll = ee.ImageCollection("MODIS/061/MCD43A2").filterDate(start_date, end_date)

In [None]:
def mask_low_quality(albedo_img):
    qa = (quality_coll
          .filter(ee.Filter.eq('system:index', albedo_img.get('system:index')))
          .first()
          .select('BRDF_Albedo_Band_Quality_Band1'))
    mask = qa.eq(0)  # "best quality"
    return albedo_img.updateMask(mask)

def process_station_albedo(station_name, geometry):
    print(f"\n--- Processing {station_name} ---")

    def per_image(img):
        mean_dict = img.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=500,  # MODIS resolution
            bestEffort=True,
            maxPixels=1e12
        )
        return ee.Feature(None, {
            'date': img.date().format("YYYY-MM-dd"),
            'Albedo_BSA_Band1': mean_dict.get('Albedo_BSA_Band1'),
            'Albedo_WSA_Band1': mean_dict.get('Albedo_WSA_Band1'),
            'station': station_name
        })

    ts = albedo_coll.map(per_image)
    ts = ee.FeatureCollection(ts)
    df = geemap.ee_to_df(ts)

    if df.empty:
        print(f"No albedo data for {station_name}")
        return None

    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')

    g = df.set_index('date').reindex(full_range)
    g['station'] = station_name

    for band in ['Albedo_BSA_Band1', 'Albedo_WSA_Band1']:
        g[band] = g[band].astype(float)
        g[band] = g[band].interpolate(method='linear').ffill().bfill()

    try:
        g['Albedo_BSA_Band1_smooth'] = savgol_filter(g['Albedo_BSA_Band1'], window_length=31, polyorder=2)
        g['Albedo_WSA_Band1_smooth'] = savgol_filter(g['Albedo_WSA_Band1'], window_length=31, polyorder=2)
    except:
        g['Albedo_BSA_Band1_smooth'] = g['Albedo_BSA_Band1']
        g['Albedo_WSA_Band1_smooth'] = g['Albedo_WSA_Band1']

    g.index.name = 'date'
    g = g.reset_index()

    outname = f"{station_name}_Albedo_Data.csv"
    g.to_csv(outname, index=False)
    print(f"Saved {outname} with {g.shape[0]} rows")

    return g


In [None]:
all_dfs = {}
for station, geom in stations.items():
    df_station = process_station_albedo(station, geom)
    if df_station is not None:
        all_dfs[station] = df_station

print("\nProcessing complete!")


--- Processing Aparri ---
Saved Aparri_Albedo_Data.csv with 3653 rows

--- Processing Basco ---
Saved Basco_Albedo_Data.csv with 3653 rows

--- Processing Baler ---
Saved Baler_Albedo_Data.csv with 3653 rows

--- Processing Ambulong ---
Saved Ambulong_Albedo_Data.csv with 3653 rows

--- Processing Cabanatuan ---
Saved Cabanatuan_Albedo_Data.csv with 3653 rows

Processing complete!
