In [78]:
import os
import sys
import pathlib
from glob import glob 
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
import json

In [79]:
import numpy as np
import pandas as pd

In [80]:
import xarray as xr
import salem

In [81]:
salem.__version__

'0.2.4'

### local function definition 

In [82]:
def make_ts(gpm_ts, climo): 
    
    gpm_ts_c = gpm_ts.copy()
    
    dec = ["{}%".format(x) for x in range(20, 100, 20)]
    
    categories = []
    anoms = []
    anoms_pc = []

    for i, row in gpm_ts_c.iterrows(): 

        month = row.name.month 

        val = row.values[0]

        clim = climo.loc[month, :]

        deciles = clim.loc[dec,]

        ave = clim.loc['mean']

        anom = val - ave

        anom_pc = (val / ave) * 100

        if (val < deciles.loc['20%']): 
            category = 'Well below'
        elif (deciles.loc['20%'] <= val <= deciles.loc['40%']): 
            category = 'Below'
        elif (deciles.loc['40%'] < val <= deciles.loc['60%']): 
            category = 'Normal'
        elif (deciles.loc['60%'] < val <= deciles.loc['80%']): 
            category = 'Above'    
        elif (val > deciles.loc['80%']): 
            category = 'Well above' 
        else: 
            print("category cannot be calculated")

        categories.append(category)

        anoms.append(anom)

        anoms_pc.append(anom_pc)
        
    gpm_ts_c.loc[:,'year'] = gpm_ts.index.year 

    gpm_ts_c.loc[:,'month'] = gpm_ts.index.month 

    gpm_ts_c.loc[:,'anomaly'] = np.array(anoms)

    gpm_ts_c.loc[:,'percent'] = np.array(anoms_pc)

    gpm_ts_c.loc[:,'category'] = np.array(categories)
    
    return gpm_ts_c.loc[:,['year','month','precipitationCal','anomaly','percent','category']]

### defines the path to the climatologies 

In [83]:
dpath_climo = pathlib.Path.cwd().parent / 'outputs' / 'climatologies'

In [84]:
dpath_climo

PosixPath('/home/nicolasf/operational/ICU/ops/GPM_IMERG/outputs/climatologies')

### list the files (monthly climatologies for each Island Group)

In [85]:
lfiles_climo = list(dpath_climo.glob("GPM_IMERG_monthly_climo_*.csv"))

In [86]:
len(lfiles_climo)

29

### output path for the last 6 months time-series 

In [87]:
output_path = pathlib.Path.cwd().parent / 'outputs' / 'Time_Series' / 'last_6_months'

In [88]:
output_path

PosixPath('/home/nicolasf/operational/ICU/ops/GPM_IMERG/outputs/Time_Series/last_6_months')

In [89]:
if not output_path.exists(): 
    output_path.mkdir(parents=True)

### path to the updated **extended South Pacific** GPM / IMERG files 

In [90]:
dpath_GPM = pathlib.Path.cwd().parents[1] / 'data' / 'GPM_IMERG' / 'daily' / 'extended_SP'

In [91]:
dpath_GPM

PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP')

In [92]:
lfiles_GPM = list(dpath_GPM.glob('*.nc'))

In [93]:
lfiles_GPM.sort()

### check that the TRMM dataset has been updated to the latest available date here (~ 2 days lag to real time)

In [94]:
lfiles_GPM[-10:]

[PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.22.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.23.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.24.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.25.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.26.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.27.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.28.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/GPM_IMERG_daily.v06.2020.01.29.nc'),
 PosixPath('/home/nicolasf/operational/ICU/ops/data/GPM_IMERG/da

if not updated:

  
1) cd into `~/operational/ICU/ops/data/GPM_IMERG/daily/extended_SP/` and note the date of the last netcdf file   
2) activate the `pangeo` environment and cd into `~/operational/ICU/ops/data/GPM_IMERG/daily/scripts/`  
3) run:   

```
python get_daily_GPM_IMERG_netcdf.py -o ../extended_SP -lonW 125 -lonE 240 -latN 25 -latS -50 -s YYYMMDD -e YYYYMMDD
```

Then re-run the lines above to get the updated list of files 

### get the last dates and determine the period to load in 

In [95]:
last_GPM_file = lfiles_GPM[-1]

In [96]:
dates_elems = list(map(int, str(last_GPM_file).split('.')[-4:-1]))

In [97]:
dates_elems

[2020, 1, 31]

In [98]:
last_date_GPM = datetime(*dates_elems)

In [99]:
last_date_GPM

datetime.datetime(2020, 1, 31, 0, 0)

### we want the last 6 months of daily TRMM data 

In [100]:
nmonths = 6

In [101]:
start_date_GPM = datetime((last_date_GPM - relativedelta(months=nmonths - 1)).year, (last_date_GPM - relativedelta(months=nmonths - 1)).month, 1)

In [102]:
start_date_GPM

datetime.datetime(2019, 8, 1, 0, 0)

### sanity check on the dates here

In [103]:
print("the last 6 months GPM period will cover the days from {:%Y-%m-%d} to {:%Y-%m-%d}".format(start_date_GPM, last_date_GPM))

the last 6 months GPM period will cover the days from 2019-08-01 to 2020-01-31


### now construct the final list of files (using pandas date_range function)

In [104]:
dates_GPM = pd.date_range(start=start_date_GPM, end=last_date_GPM, freq='1D')

In [105]:
dates_GPM

DatetimeIndex(['2019-08-01', '2019-08-02', '2019-08-03', '2019-08-04',
               '2019-08-05', '2019-08-06', '2019-08-07', '2019-08-08',
               '2019-08-09', '2019-08-10',
               ...
               '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25',
               '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29',
               '2020-01-30', '2020-01-31'],
              dtype='datetime64[ns]', length=184, freq='D')

In [106]:
lfiles_GPM = [] 
for d in dates_GPM: 
    lfiles_GPM.append(dpath_GPM / 'GPM_IMERG_daily.v06.{:%Y.%m.%d}.nc'.format(d))

### and now loads the whole dataset of the last 6 months

In [107]:
dset = xr.open_mfdataset(lfiles_GPM, combine='by_coords')

In [108]:
dset

### calculates the monthly averages, will be preliminary for the last month if not all days downloaded

In [109]:
dset = dset[['precipitationCal']]

In [110]:
dset = dset.resample(time='1M').mean('time')

In [111]:
dset.compute()

### Now loads the shapefiles 

In [112]:
shapes_ipath = pathlib.Path.cwd().parents[1] / 'data' / 'shapefiles' / 'ICU' / 'clipped' / 'countries_converted'

In [113]:
shapes_ipath

PosixPath('/home/nicolasf/operational/ICU/ops/data/shapefiles/ICU/clipped/countries_converted')

In [114]:
lshapefiles = shapes_ipath.glob('*/*.shp')

In [115]:
lshapefiles = list(lshapefiles)

In [116]:
lshapefiles.sort()

### open the dictionnay mapping country name in filenames to actual country name 

In [117]:
with open(pathlib.Path.cwd().parents[1] / 'resources' / 'dict_countries.json', 'r') as fj: 
    dict_countries = json.load(fj)

### Now loops over each shapefile, use the country geometry to clip and mask the TRMM dataset, calculates the regional average, and make the last 6 months Time-Series

In [118]:
list(dpath_climo.glob("*monthly_climo*.csv"))[-1]

PosixPath('/home/nicolasf/operational/ICU/ops/GPM_IMERG/outputs/climatologies/GPM_IMERG_monthly_climo_Solomon_Islands.csv')

In [119]:
for shp_filename in lshapefiles: 
    
    shapes = salem.read_shapefile(shp_filename)
    
    country_fname = os.path.basename(shp_filename)[7:-4]
    
    country_name = dict_countries[country_fname]
    
    print("processing {}".format(country_name))
    
    subset = dset.salem.subset(shape=shapes, margin=2)
    
    roi = subset.salem.roi(shape=shapes, all_touched=True)

    gpm_ts = roi.mean(dim=('lat','lon')).to_dataframe()
    
    last_date = gpm_ts.index[-1]
    
    climo = pd.read_csv(dpath_climo / 'GPM_IMERG_monthly_climo_{}.csv'.format(country_fname), index_col=0)
    
    last_6_months = make_ts(gpm_ts, climo)
        
    last_6_months.to_csv(output_path / 'GPM_IMERG_{}_pbased.csv'.format(country_fname))

processing American Samoa
processing Austral Islands
processing Federated States of Micronesia
processing Fiji
processing Guam
processing Kiribati: Gilbert Islands
processing Kiribati: Line Islands
processing Kiribati: Phoenix Islands
processing Marquesas
processing Marshall Islands
processing Nauru
processing New Caledonia
processing Niue
processing Northern Cook Islands
processing Northern Marianas
processing Palau
processing Papua New Guinea
processing Pitcairn Islands
processing Samoa
processing Society Islands
processing Solomon Islands
processing Southern Cook Islands
processing Tokelau
processing Tonga
processing Tuamotu / Gambier Islands
processing Tuvalu
processing Vanuatu North
processing Vanuatu South
processing Wallis & Futuna


In [120]:
dpath_climo

PosixPath('/home/nicolasf/operational/ICU/ops/GPM_IMERG/outputs/climatologies')

In [121]:
output_path

PosixPath('/home/nicolasf/operational/ICU/ops/GPM_IMERG/outputs/Time_Series/last_6_months')

In [122]:
last_6_months

Unnamed: 0_level_0,year,month,precipitationCal,anomaly,percent,category
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-08-31 00:00:00,2019,8,4.44845,1.077634,131.969522,Well above
2019-09-30 00:00:00,2019,9,7.528955,1.552173,125.970047,Above
2019-10-31 00:00:00,2019,10,5.991062,-2.588615,69.828526,Below
2019-11-30 00:00:00,2019,11,10.047151,-1.705875,85.485654,Below
2019-12-31 00:00:00,2019,12,12.003279,-1.911412,86.263356,Normal
2020-01-31 00:00:00,2020,1,17.149258,5.078937,142.077897,Above


In [123]:
gpm_ts

Unnamed: 0_level_0,precipitationCal
time,Unnamed: 1_level_1
2019-08-31 00:00:00,4.44845
2019-09-30 00:00:00,7.528955
2019-10-31 00:00:00,5.991062
2019-11-30 00:00:00,10.047151
2019-12-31 00:00:00,12.003279
2020-01-31 00:00:00,17.149258
