<a href="https://colab.research.google.com/github/machiwao/CCTHESS1-CCTHESS2-Dev-and-Docs/blob/jessy/Albedo_data_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import ee
import geemap
import pandas as pd
from scipy.signal import savgol_filter

In [7]:
# Authenticate and initialize Earth Engine
cloud_project = 'heat-index-forecasting'

try:
  ee.Initialize(project=cloud_project)
except:
  ee.Authenticate()
  ee.Initialize(project=cloud_project)

In [8]:
stations = {
    "Sinait": ee.Geometry.Point([120.459762, 17.89019]).buffer(25000),
    "Tayabas": ee.Geometry.Point([121.596575, 14.018428]).buffer(25000),
    "Tanay": ee.Geometry.Point([121.36927, 14.581167]).buffer(25000),
    "Tuguegarao": ee.Geometry.Point([121.758469, 17.647678]).buffer(25000),
    "Virac": ee.Geometry.Point([124.209834, 13.576558]).buffer(25000),
}

start_date = "2014-01-01"   # MODIS MCD43 start
end_date   = "2024-01-01"
full_range = pd.date_range(start_date, end_date, freq="D")

albedo_coll = ee.ImageCollection("MODIS/061/MCD43A3").filterDate(start_date, end_date)
quality_coll = ee.ImageCollection("MODIS/061/MCD43A2").filterDate(start_date, end_date)

In [9]:
def mask_low_quality(albedo_img):
    qa = (quality_coll
          .filter(ee.Filter.eq('system:index', albedo_img.get('system:index')))
          .first()
          .select('BRDF_Albedo_Band_Quality_Band1'))
    mask = qa.eq(0)  # "best quality"
    return albedo_img.updateMask(mask)

def process_station_albedo(station_name, geometry):
    print(f"\n--- Processing {station_name} ---")

    def per_image(img):
        mean_dict = img.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=500,  # MODIS resolution
            bestEffort=True,
            maxPixels=1e12
        )
        return ee.Feature(None, {
            'date': img.date().format("YYYY-MM-dd"),
            'Albedo_BSA_Band1': mean_dict.get('Albedo_BSA_Band1'),
            'Albedo_WSA_Band1': mean_dict.get('Albedo_WSA_Band1'),
        })

    ts = albedo_coll.map(per_image)
    ts = ee.FeatureCollection(ts)
    df = geemap.ee_to_df(ts)

    if df.empty:
        print(f"No albedo data for {station_name}")
        return None

    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')

    g = df.set_index('date').reindex(full_range)
    g['station'] = station_name

    for band in ['Albedo_BSA_Band1', 'Albedo_WSA_Band1']:
        g[band] = g[band].astype(float)
        g[band] = g[band].interpolate(method='linear').ffill().bfill()

    try:
        g['Albedo_BSA_Band1_smooth'] = savgol_filter(g['Albedo_BSA_Band1'], window_length=31, polyorder=2)
        g['Albedo_WSA_Band1_smooth'] = savgol_filter(g['Albedo_WSA_Band1'], window_length=31, polyorder=2)
    except:
        g['Albedo_BSA_Band1_smooth'] = g['Albedo_BSA_Band1']
        g['Albedo_WSA_Band1_smooth'] = g['Albedo_WSA_Band1']

    g.index.name = 'date'
    g = g.reset_index()

    outname = f"{station_name}_Albedo_Data.csv"
    g.to_csv(outname, index=False)
    print(f"Saved {outname} with {g.shape[0]} rows")

    return g


In [10]:
all_dfs = {}
for station, geom in stations.items():
    df_station = process_station_albedo(station, geom)
    if df_station is not None:
        all_dfs[station] = df_station

print("\nProcessing complete!")


--- Processing Sinait ---
Saved Sinait_Albedo_Data.csv with 3653 rows

--- Processing Tayabas ---
Saved Tayabas_Albedo_Data.csv with 3653 rows

--- Processing Tanay ---
Saved Tanay_Albedo_Data.csv with 3653 rows

--- Processing Tuguegarao ---
Saved Tuguegarao_Albedo_Data.csv with 3653 rows

--- Processing Virac ---
Saved Virac_Albedo_Data.csv with 3653 rows

Processing complete!
