# GSMAP Preprocessing
This notebook is used to preprocess the Gsmap data for the initialization of the model.

In [27]:
import gzip
import xarray as xr
import rioxarray
import numpy as np
from datetime import datetime, timedelta
from pandas import date_range
import os

In [3]:
gsmap_dir = 'D:/Projects/ef5-preprocessing/data/gsmap_daily'
data_time = datetime(2024,1,1)
filename = f'gsmap_gauge.{data_time:%Y%m%d}.0.1d.daily.00Z-23Z.v8.1000.0.dat.gz'

In [5]:
# ----- Param metadata GSMaP 0.1° daily -----
nx = 3600
ny = 1200
lon0, lat0 = 0.05, 59.95
dlon, dlat = 0.1, -0.1

# Buat koordinat
lons = lon0 + np.arange(nx) * dlon
lats = lat0 + np.arange(ny) * dlat

In [23]:
with gzip.open(os.path.join(gsmap_dir,filename), "rb") as f:
    data = np.frombuffer(f.read(), dtype="<f4")  # float32 little-endian
    data = data.reshape(ny, nx)

da = xr.DataArray(
    data,
    dims=("lat", "lon"),
    coords={"lat": lats, "lon": lons},
    name="precip"
)

extent = (105.0, -8.0, 108.9, -5.7)
da_slice = da.sel(lat=slice(extent[3], extent[1]), lon=slice(extent[0], extent[2]))
da_slice = da_slice * 24
da_slice = da_slice.rio.write_crs("EPSG:4326").astype("float32")
da_slice = da_slice.rio.set_spatial_dims(x_dim="lon", y_dim="lat")

da_slice.rio.to_raster('gsmap_test.tif', compress="LZW", fillValue=-9999, tiled=False)

Wrap in function to process multiple files

In [30]:
def clip_data(extent,in_filename,out_filename):
    with gzip.open(in_filename, "rb") as f:
        data = np.frombuffer(f.read(), dtype="<f4")  # float32 little-endian
        data = data.reshape(ny, nx)

    da = xr.DataArray(
        data,
        dims=("lat", "lon"),
        coords={"lat": lats, "lon": lons},
        name="precip"
    )

    da_slice = da.sel(lat=slice(extent[3], extent[1]), lon=slice(extent[0], extent[2]))
    da_slice = da_slice * 24
    da_slice = da_slice.rio.write_crs("EPSG:4326").astype("float32")
    da_slice = da_slice.rio.set_spatial_dims(x_dim="lon", y_dim="lat")

    da_slice.rio.to_raster(out_filename, compress="LZW", fillValue=-9999, tiled=False)

In [31]:
out_path = f'D:/Projects/ef5-preprocessing/data/gsmap_daily/'
data_times = date_range(start='2024-01-01', end='2025-02-28', freq='D')
for data_time in data_times:
    gsmap_dir = f'D:/Data/GSMAP_daily/{data_time:%Y%m}'
    filename = f'gsmap_gauge.{data_time:%Y%m%d}.0.1d.daily.00Z-23Z.v8.1000.0.dat.gz'
    in_filename = os.path.join(gsmap_dir,filename)
    out_filename = os.path.join(out_path,f'gsmap.daily.{data_time:%Y%m%d}.tif')
    extent = (105.0, -8.0, 108.9, -5.7)
    clip_data(extent,in_filename,out_filename)