This notebook is an annotated version of palau_data.py with information on data sources and details

In [27]:
import numpy as np
import requests
import xarray as xr
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
import pandas as pd
import os
import calendar
from netCDF4 import num2date
# Palau lat/lon
lat = 7.5150
lon = 134.5825

min_lat_ssh, max_lat_ssh = 7.0, 8.0
min_lon_ssh, max_lon_ssh = 134.0, 135.0

In [28]:
columns = ["LastMonth","Current", "Forecast", "Outlook"]

df = pd.DataFrame(columns=columns)

source_df = pd.DataFrame(columns=columns)

In [None]:
today_hst = datetime.now(ZoneInfo("Pacific/Honolulu"))
today_str = today_hst.strftime("%Y%m%d")
yest_hst = today_hst - timedelta(days=1)
yest_str = yest_hst.strftime("%Y%m%d")

cycle = "06"

today = datetime.utcnow()
if today.month == 1:
    last_month = 12
    last_year = today.year - 1
else:
    last_month = today.month - 1
    last_year = today.year

last_month_str = f"{calendar.month_abbr[last_month]} {last_year}"  # e.g., "Apr 2025"

months_since_1960 = (last_year - 1960) * 12 + (last_month - 1)
t_value = months_since_1960 + 0.5 

### Rainfall

#### Rain Past Month

Source: IRI [Link](https://iridl.ldeo.columbia.edu/maproom/Global/Precipitation/Anomaly.html)
<br>
Monthly precipitation anomalies in units of mm/month based upon precipitation estimates from the CAMS_OPI dataset. The period used for computing the climatology is 1991-2020

In [30]:
base_url = "https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.CAMS_OPI/.v0208/.anomaly_9120/.prcp"
url = (
    f"{base_url}/T/%28days%20since%201960-01-01%29streamgridunitconvert"
    f"/T/differential_mul/T/%28months%20since%201960-01-01%29streamgridunitconvert"
    f"//units/%28mm/month%29def//long_name/%28Precipitation%20Anomaly%29def"
    f"/DATA/-500/-450/-400/-350/-300/-250/-200/-150/-100/-50/-25/25/50/100/150/200/250/300/350/400/450/500/VALUES/prcp_anomaly_max500_colors2"
    f"/Y/%285N%29%2810N%29RANGEEDGES/X/%28130E%29%28140E%29RANGEEDGES"
    f"/T/%28{last_month_str}%29%28{last_month_str}%29RANGEEDGES/data.nc"
)

filename = "./data_files/rf_lastMonth.nc"
response = requests.get(url)

with open(filename, 'wb') as f:
    f.write(response.content)

ds = xr.open_dataset(filename, decode_times=False)
df.loc['Rain', 'LastMonth'] = ds['aprod'].sel(Y=lat, X=lon, T=t_value, method='nearest').values

source_df.loc['Rain','LastMonth'] = 'https://iridl.ldeo.columbia.edu/maproom/Global/Precipitation/Anomaly.html'

#### Rain Current

Source: CFS
<br>
Using "6" cycle for 3PM but there is a notable lag for this data...

In [31]:
grib_url = f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/cfs/prod/cfs.{today_str}/{cycle}/time_grib_01/prate.01.{today_str}{cycle}.daily.grb2"
idx_url = grib_url + ".idx"

grib_file = "./data_files/rf_current.grb2"
idx_file = grib_file + ".idx"

with open(grib_file, "wb") as f:
    f.write(requests.get(grib_url).content)

with open(idx_file, "wb") as f:
    f.write(requests.get(idx_url).content)

ds = xr.open_dataset(grib_file, engine="cfgrib")

palau = ds.sel(latitude=lat,longitude=lon,method='nearest')
rf_palau_df = palau[['prate']].to_dataframe().reset_index()

palau_tz = ZoneInfo("Pacific/Palau")
now_palau = datetime.now(palau_tz)

rf_palau_df['valid_time'] = pd.to_datetime(rf_palau_df['valid_time']).dt.tz_localize('UTC').dt.tz_convert(palau_tz)
rf_palau_1d_df = rf_palau_df[rf_palau_df['valid_time'].dt.date == now_palau.date()]
rf_1d_sum = rf_palau_df['prate'].sum()

df.loc["Rain", "Current"] = rf_1d_sum
source_df.loc['Rain','Current'] = 'https://www.ncei.noaa.gov/products/weather-climate-models/climate-forecast-system'

Ignoring index file './data_files/rf_current.grb2.5b7b6.idx' older than GRIB file


#### Rain Forecast

Source: BOM [Link](http://www.bom.gov.au/climate/pacific/outlooks/)
<br>
Base period 1981-2018
<br>
Difference from average in mm
<br>
Updates every Tuesday and Thursday. The date range will vary depending on date of update

In [32]:
url = "https://access-s.clide.cloud/files/global/weekly/data/rain.forecast.anom.weekly.nc"
filename = "./data_files/rf.forecast.nc"

response = requests.get(url)

with open(filename, 'wb') as f:
    f.write(response.content)

rf_forecast_dataset = xr.open_dataset(filename)
rf_forecast_palau = rf_forecast_dataset['rain'].sel(lat=slice(min_lat_ssh,max_lat_ssh),lon=slice(min_lon_ssh,max_lon_ssh))
rf_forecast_palau_df = rf_forecast_palau.to_dataframe().reset_index()
rf_forecast_value = rf_forecast_palau_df['rain'].iloc[1]
df.loc["Rain", "Forecast"] = rf_forecast_value
source_df.loc['Rain','Forecast'] = 'http://www.bom.gov.au/climate/pacific/outlooks/'

#### Rain Outlook

Source: BOM [Link](http://www.bom.gov.au/climate/pacific/outlooks/)
<br>
Base period 1981-2018
<br>
Difference from average mm for next month
<br>
Updates every Tuesday and Thursday. The date range will vary depending on date of update

In [33]:
url = "https://access-s.clide.cloud/files/global/monthly/data/rain.forecast.anom.monthly.nc"
filename = "./data_files/rf.outlook.nc"

response = requests.get(url)
if response.status_code == 200:
    with open(filename, 'wb') as f:
        f.write(response.content)
else:
    print(f"Failed to download file. Status code: {response.status_code}")

rf_outlook_dataset = xr.open_dataset(filename)
rf_outlook_palau = rf_outlook_dataset['rain'].sel(lat=slice(min_lat_ssh,max_lat_ssh),lon=slice(min_lon_ssh,max_lon_ssh))

rf_outlook_palau_df = rf_outlook_palau.to_dataframe().reset_index()
rf_outlook_value = rf_outlook_palau_df['rain'].iloc[0]

df.loc["Rain", "Outlook"] = rf_outlook_value
source_df.loc['Rain','Outlook'] = 'http://www.bom.gov.au/climate/pacific/outlooks/'
print(rf_outlook_value)

25.685072


### Temperature

#### Past Month

Source: IRI [Link](http://iridl.ldeo.columbia.edu/maproom/Global/Atm_Temp/Anomaly.html)
<br>
Monthly surface air temperature anomalies are shown in units of °C
<br>
1991-2020 climatological average

In [34]:
today_hst = datetime.now(ZoneInfo("Pacific/Honolulu"))
first_of_this_month = today_hst.replace(day=1)
last_month_date = first_of_this_month - timedelta(days=1)
last_month_str = last_month_date.strftime("%b %Y")  # e.g. "Apr 2025"

# 2. Build dynamic URL
base_url = "https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.CAMS/.anomaly/.temp_9120"
url = (
    f"{base_url}"
    f"/Y/%285N%29%2810N%29RANGEEDGES/X/%28130E%29%28140E%29RANGEEDGES"
    f"/T/%28{last_month_str}%29%28{last_month_str}%29RANGEEDGES/data.nc"
)

filename = "./data_files/tmean_lastMonth.nc"
response = requests.get(url)

with open(filename, 'wb') as f:
    f.write(response.content)

ds = xr.open_dataset(filename, decode_times=False, use_cftime=True)
tmean_lastmonth_value_c = ds['temp_9120'].sel(Y=lat, X=lon, T=t_value,method='nearest').values

tmean_lastmonth_value_f = tmean_lastmonth_value_c * 9/5
df.loc['TMean','LastMonth']=tmean_lastmonth_value_f
source_df.loc['TMean','LastMonth'] = 'http://iridl.ldeo.columbia.edu/maproom/Global/Atm_Temp/Anomaly.html'

#### Current

Source: CFS
<br>
Using "6" cycle for 3PM but there is a notable lag for this data...

In [35]:
grib_url = f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/cfs/prod/cfs.{today_str}/{cycle}/time_grib_01/tmp2m.01.{today_str}{cycle}.daily.grb2"
idx_url = grib_url + ".idx"

grib_file = "./data_files/tmean.current.grb2"

idx_file = grib_file + ".idx"

with open(grib_file, "wb") as f:
    f.write(requests.get(grib_url).content)

with open(idx_file, "wb") as f:
    f.write(requests.get(idx_url).content)

ds = xr.open_dataset(grib_file, engine="cfgrib")

palau = ds.sel(latitude=lat,longitude=lon,method='nearest')
temp_palau_df = palau[['t2m']].to_dataframe().reset_index()

palau_tz = ZoneInfo("Pacific/Palau")
now_palau = datetime.now(palau_tz)

temp_palau_df['valid_time'] = pd.to_datetime(temp_palau_df['valid_time']).dt.tz_localize('UTC').dt.tz_convert(palau_tz)

Ignoring index file './data_files/tmean.current.grb2.5b7b6.idx' older than GRIB file


In [36]:
temp_palau_1d_df = temp_palau_df[temp_palau_df['valid_time'].dt.date == now_palau.date()]
temp_1d_mean = temp_palau_df['t2m'].mean()
temp_1d_mean_c = temp_1d_mean-273.15
temp_1d_mean_f = temp_1d_mean_c*9/5+32
df.loc["TMean", "Current"] = temp_1d_mean_f
source_df.loc['TMean','Current'] = 'https://www.ncei.noaa.gov/products/weather-climate-models/climate-forecast-system'
print("One day temp average:", temp_1d_mean.item())

One day temp average: 302.1983642578125


#### Tmean Forecast

CFSv2 [Link](https://www.cpc.ncep.noaa.gov/products/people/mchen/CFSv2FCST/weekly/)
<br>
Updates daily.<br>
Predicts t+1 to t+8 days<br>
Temperature anomaly in Kelvin converted to F


In [54]:
url = f'https://www.cpc.ncep.noaa.gov/products/people/mchen/CFSv2FCST/weekly/data/CFSv2.tmpsfc.{yest_str}.wkly.anom.nc'
filename = "./data_files/tmean.forecast.nc"

response = requests.get(url)
if response.status_code == 200:
    with open(filename, 'wb') as f:
        f.write(response.content)
else:
    print(f"Failed to download file. Status code: {response.status_code}")
    
tmean_forecast_dataset = xr.open_dataset(filename)

tmean_forecast_dataset_palau = tmean_forecast_dataset['anom'].sel(lat=lat, lon=lon, method='nearest')
tmean_forecast_palau_df = tmean_forecast_dataset_palau.to_dataframe().reset_index()

tmean_forecast_value_c = tmean_forecast_palau_df['anom'].iloc[0]
tmean_forecast_value_f = tmean_forecast_value_c * 9/5
df.loc["TMean", "Forecast"] = tmean_forecast_value_f
source_df.loc['TMean','Forecast'] = f'https://www.cpc.ncep.noaa.gov/products/people/mchen/CFSv2FCST/weekly/'


#### TMean Outlook

Seasonal climate forecast from CFSv2 [Link](https://www.cpc.ncep.noaa.gov/products/CFSv2/CFSv2_body.html)
<br>
Monthly T2m anomaly initially in Kelvin
<br>
Respect to 1991-2020 hindcast climatology
<br>
Using 1st ensemble (E1) data

In [57]:
url = "https://www.cpc.ncep.noaa.gov/products/CFSv2/dataInd1/glbSSTMon.nc"
filename = "./data_files/tmean.outlook.nc"

response = requests.get(url)
if response.status_code == 200:
    with open(filename, 'wb') as f:
        f.write(response.content)
else:
    print(f"Failed to download file. Status code: {response.status_code}")

tmean_outlook_dataset = xr.open_dataset(filename)

tmean_outlook_dataset_palau = tmean_outlook_dataset['anom'].sel(lat=lat, lon=lon, method='nearest')
tmean_outlook_palau_df = tmean_outlook_dataset_palau.to_dataframe().reset_index()

tmean_outlook_value_c = tmean_outlook_palau_df['anom'].iloc[1]
tmean_outlook_value_f = tmean_outlook_value_c * 9/5
df.loc["TMean", "Outlook"] = tmean_outlook_value_f
source_df.loc['TMean','Outlook'] = 'https://www.cpc.ncep.noaa.gov/products/CFSv2/CFSv2_body.html'

In [59]:
tmean_outlook_palau_df

Unnamed: 0,ens,time,lev,lon,lat,anom
0,1.0,2025-06-01,1.0,135.0,7.0866,
1,1.0,2025-07-01,1.0,135.0,7.0866,0.855042
2,1.0,2025-08-01,1.0,135.0,7.0866,
3,1.0,2025-09-01,1.0,135.0,7.0866,1.558949
4,1.0,2025-10-01,1.0,135.0,7.0866,
5,1.0,2025-11-01,1.0,135.0,7.0866,0.0


### Wind Speed

In [51]:
grib_url = f"https://nomads.ncep.noaa.gov/pub/data/nccf/com/cfs/prod/cfs.{today_str}/{cycle}/time_grib_01/wnd10m.01.{today_str}{cycle}.daily.grb2"
idx_url = grib_url + ".idx"

grib_file = "./data_files/wnd10m.cfs.daily.grb2"
os.remove(f"{grib_file}.idx")

idx_file = grib_file + ".idx"

with open(grib_file, "wb") as f:
    f.write(requests.get(grib_url).content)

with open(idx_file, "wb") as f:
    f.write(requests.get(idx_url).content)

ds = xr.open_dataset(grib_file, engine="cfgrib")
palau = ds.sel(latitude=lat,longitude=lon,method='nearest')
uv_palau_df = palau[['u10', 'v10']].to_dataframe().reset_index()

palau_tz = ZoneInfo("Pacific/Palau")
now_palau = datetime.now(palau_tz)
uv_palau_df['valid_time'] = pd.to_datetime(uv_palau_df['valid_time']).dt.tz_localize('UTC').dt.tz_convert(palau_tz)

start_date = (now_palau + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
end_date = start_date + timedelta(days=7) - timedelta(seconds=1)

uv_palau_3m_df = uv_palau_df[
    (uv_palau_df['valid_time'] >= start_date) &
    (uv_palau_df['valid_time'] <= end_date)
]
uv_palau_3m_df = uv_palau_3m_df.copy()
uv_palau_3m_df['wind_speed'] = np.sqrt(uv_palau_3m_df['u10']**2 + uv_palau_3m_df['v10']**2)
uv_palau_3m_df['Date'] = uv_palau_3m_df['valid_time'].dt.date


wind_speed_df = uv_palau_3m_df.groupby('Date')[['wind_speed']].max()
wind_speed_df

Ignoring index file './data_files/wnd10m.cfs.daily.grb2.5b7b6.idx' older than GRIB file


Unnamed: 0_level_0,wind_speed
Date,Unnamed: 1_level_1
2025-05-14,6.853744
2025-05-15,7.780655
2025-05-16,9.088653
2025-05-17,8.21658
2025-05-18,4.873079
2025-05-19,3.792691
2025-05-20,2.694736


In [None]:
result = wind_speed_df[["wind_speed"]].reset_index()

result.to_json("./data/wind_speed.json",orient="records", date_format="iso")

In [None]:
source_df.loc['Wind','Forecast'] = 'https://www.ncei.noaa.gov/products/weather-climate-models/climate-forecast-system'

In [None]:
df = df.astype(float)
df.reset_index(inplace=True)
df.rename(columns={"index": "Type"}, inplace=True)

df.to_json("./data/palau_rf_temp.json", orient="records", date_format="iso")


In [None]:
source_df.reset_index(inplace=True)
source_df.rename(columns={"index": "Type"}, inplace=True)

source_df.to_json("./data/sources.json", orient="records", date_format="iso")
