<a href="https://colab.research.google.com/github/machiwao/CCTHESS1-CCTHESS2-Dev-and-Docs/blob/jessy/NDBI_data_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# !pip install earthengine-api geemap scipy

In [17]:
import ee
import geemap
import pandas as pd
from scipy.signal import savgol_filter
import os

In [18]:
cloud_project = 'heat-index-forecasting'

try:
  ee.Initialize(project=cloud_project)
except:
  ee.Authenticate()
  ee.Initialize(project=cloud_project)

stations = {
    "Sinait": ee.Geometry.Point([120.459762, 17.89019]).buffer(25000),
    "Tayabas": ee.Geometry.Point([121.596575, 14.018428]).buffer(25000),
    "Tanay": ee.Geometry.Point([121.36927, 14.581167]).buffer(25000),
    "Tuguegarao": ee.Geometry.Point([121.758469, 17.647678]).buffer(25000),
    "Virac": ee.Geometry.Point([124.209834, 13.576558]).buffer(25000),
}

start_date = "2014-01-01"
end_date   = "2024-01-01"


In [19]:
def addNDBI_HLS(img):
    """Selects and renames B5 and B6 bands for NDBI calculation."""
    return img.select(['B5', 'B6'])

hls = (ee.ImageCollection("NASA/HLS/HLSL30/v002")
       .filterDate(start_date, end_date)
       .map(addNDBI_HLS))

sample = hls.first()
print(sample.bandNames().getInfo())

['B5', 'B6']


In [20]:
from datetime import date
def process_station(station_name, geometry):
    print(f"Processing {station_name}...")

    ts = hls.map(lambda img: ee.Feature(
        None, {
            "date": img.date().format("YYYY-MM-dd"),
            "B5": img.reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=geometry,
                scale=100,
                bestEffort=True,
                maxPixels=1e9
            ).get("B5"),
            "B6": img.reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=geometry,
                scale=100,
                bestEffort=True,
                maxPixels=1e9
            ).get("B6"),
        }
    ))

    ts = ee.FeatureCollection(ts)

    df = geemap.ee_to_df(ts)
    if df.empty:
        print(f"No data for {station_name}")
        return df

    # Calculate NDBI within the DataFrame
    df['NDBI'] = (df['B6'] - df['B5']) / (df['B6'] + df['B5'])

    # Reindex to full daily range
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date")

    g = df.set_index("date").reindex(full_range)
    g["station"] = station_name
    # g["NDBI"] = g["NDBI"].astype(float) # This conversion is not needed after calculation

    # Fill missing values
    g["NDBI"] = g["NDBI"].interpolate("linear").ffill().bfill()

    # Apply smoothing
    try:
        g["NDBI_smooth"] = savgol_filter(g["NDBI"], window_length=31, polyorder=2)
    except:
        g["NDBI_smooth"] = g["NDBI"]

    g.index.name = "date"
    g = g.reset_index()

    # Save CSV
    out_file = f"{station_name}_NDBI_Data.csv"
    g.to_csv(out_file, index=False)
    print(f"Saved {out_file} ({g.shape[0]} rows)")
    return g

# -------------------------------
# 4. Run for all stations
# -------------------------------
all_dfs = {}
for station, geom in stations.items():
    all_dfs[station] = process_station(station, geom)

Processing Sinait...




KeyboardInterrupt: 