# calculate the GPM / IMERG CLIMATOLOGY, one file per day of the year (month.day) except for the 29 Feb. 

In [36]:
%matplotlib inline

In [37]:
import pathlib

In [38]:
import pandas as pd
import numpy as np

In [39]:
import xarray as xr

In [40]:
HOME = pathlib.Path.home()

In [41]:
dpath = HOME /'operational' / 'ICU' / 'ops' / 'data' / 'GPM_IMERG' / 'daily' / 'extended_SP'

In [42]:
clim_path = dpath / 'CLIMATOLOGY'

In [43]:
if not clim_path.exists(): 
    clim_path.mkdir(parents=True)

### creates a list of dates for a normal (non-leap) year 

In [44]:
dates = pd.date_range(start='2001-01-01',end='2001-12-31', freq='1D')

In [45]:
dates

DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03', '2001-01-04',
               '2001-01-05', '2001-01-06', '2001-01-07', '2001-01-08',
               '2001-01-09', '2001-01-10',
               ...
               '2001-12-22', '2001-12-23', '2001-12-24', '2001-12-25',
               '2001-12-26', '2001-12-27', '2001-12-28', '2001-12-29',
               '2001-12-30', '2001-12-31'],
              dtype='datetime64[ns]', length=365, freq='D')

### loops over the dates, get all the files for month.day, and calculates the time mean over the 20 years 

In [46]:
for date in dates:
    lfiles = list(dpath.glob(f"GPM_IMERG_daily.v06.????.{date:%m.%d}.nc"))
    lfiles.sort()
    dset = xr.open_mfdataset(lfiles, combine='by_coords', concat_dim='time')
    dset = dset[['precipitationCal']]
    clim = dset.mean('time')
    clim.to_netcdf(clim_path / f"GPM_IMERG_daily.v06.{date:%m.%d}.nc")
    clim.close()
    dset.close() 