Terminal command to fetch SST data: `wget -r -l1 -nc -t 50 https://www.ncei.noaa.gov/pub/data/cmb/ersst/v5/netcdf/`.

This downloads all SST datasets in NOAA's directory (indexed by year and month from January 1854 to April 2024).

In [1]:
import xarray as xr

In [8]:
file_pattern = "ersst.v5.{year:04d}{month:02d}.nc"

start_year = 1854
end_year = 2024

yearly_means = []

for year in range(start_year, end_year + 1):
    monthly_data = []
    
    if year == 2024:
        for month in range(1, 5):
            file_path = file_pattern.format(year=year, month=month)
            ds = xr.open_dataset(file_path)
            monthly_data.append(ds['ssta'])
            ds.close()
    else:
        for month in range(1, 13):
            file_path = file_pattern.format(year=year, month=month)
            ds = xr.open_dataset(file_path)
            monthly_data.append(ds['ssta'])
            ds.close()
        
    yearly_ds = xr.concat(monthly_data, dim='time')
    yearly_mean = yearly_ds.mean(dim='time')
    yearly_means.append(yearly_mean)

yearly_means_ds = xr.concat(yearly_means, dim='year')

output_file = "annual_means.nc"
yearly_means_ds.to_netcdf(output_file)

In [9]:
xr.open_dataset("annual_means.nc")

In [12]:
yearly_means_ds = xr.open_dataset("annual_means.nc")

start_year = 1854
end_year = 2024
decade_means = []

for decade_start in range(start_year, end_year, 10):
    decade_end = decade_start + 9
    decade_data = yearly_means_ds.sel(year=slice(decade_start, decade_end))
    decade_mean = decade_data.mean(dim='year')
    
    decade_mean.attrs['decade_start'] = decade_start
    decade_mean.attrs['decade_end'] = decade_end
    
    decade_means.append(decade_mean)

last_decade_data = yearly_means_ds.sel(year=slice(2020, 2024))
last_decade_mean = last_decade_data.mean(dim='year')
last_decade_mean.attrs['decade_start'] = 2020
last_decade_mean.attrs['decade_end'] = 2024
decade_means.append(last_decade_mean)

decade_means_ds = xr.concat(decade_means, dim='decade')

output_file = "decade_means.nc"
decade_means_ds.to_netcdf(output_file)

In [13]:
xr.open_dataset("decade_means.nc")