# Convert images to .netCDF format
Nachat Jatusripitak

In [1]:
# Import required packages
import glob
import pandas as pd
import xarray as xr
import rioxarray  # pip install rioxarray
import numpy as np


In [2]:
band_names = ['pm25_t', 'u_wind_t', 'v_wind_t', 'dew_temp_t', 'temp_t', 
              'surf_pressure_t', 'precip_t', 'frp_t', 'elevation_t', 'delta_pm25_t+1']

files = sorted(glob.glob("../dataset_1/*.tif"))
band_series = {name: [] for name in band_names}

for fn in files:
    date = pd.to_datetime(fn.split("_")[-1].split(".tif")[0])
    da_multi = rioxarray.open_rasterio(fn)  # dims: (band, y, x)

    for idx, name in enumerate(band_names):
        # pick the idx-th band, then drop the band dim entirely:
        da_band = da_multi.isel(band=idx, drop=True)
        da_band = da_band.rename(name)           # name the variable
        da_band = da_band.assign_coords(time=date)
        band_series[name].append(da_band)

ds = xr.Dataset({
    name: xr.concat(series, dim="time")
    for name, series in band_series.items()
})

KeyboardInterrupt: 

In [None]:
# replace coordinates with row/col indices
ds = ds.rename({'y':'i','x':'j'})
ds = ds.assign_coords(
    i=np.arange(ds.sizes['i']),
    j=np.arange(ds.sizes['j']),
)

# clean up data variables, generate lagged delta_pm25
ds = ds.drop_vars('spatial_ref')
ds['delta_pm25_t'] = ds['delta_pm25_t+1'].shift(time=1)
ds = ds.dropna(dim="time", how="any")

In [None]:
# Export to cdf
ds.to_netcdf(
    "base.nc",
    format="NETCDF4",       
    engine="netcdf4",      
    encoding={
        var: {
            "zlib": True,
            "complevel": 4,
        }
        for var in ds.data_vars
    }
)