### process the JMA grib files downloaded for the period 2015 to 2019-11 (i.e. after the _official_ hindcast period)

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt

In [2]:
import pathlib

In [3]:
import numpy as np
import pandas as pd 
import xarray as xr
import pygrib 

In [4]:
from datetime import datetime, timedelta
from dateutil import relativedelta

In [5]:
HOME = pathlib.Path.home()

In [6]:
dict_vars = {}
dict_vars['surf_Prr'] = 'PRECIP'
dict_vars['surf_Pss'] = 'SST'
dict_vars['h2_Ptt'] = 'T2M'
dict_vars['p500_Phh'] = 'Z500'

In [7]:
print(HOME)

/home/nicolasf


In [8]:
dpath = HOME / 'research' / 'Smart_Ideas' / 'data' / 'GCMs' / 'GRIB' / 'JMA'

In [9]:
variables = [
    'Z500', 
    'PRECIP', 
    'T2M',
    'SST'
]

In [10]:
for variable in variables: 
    
    opath = HOME / 'research' / 'Smart_Ideas' / 'data' / 'GCMs' / 'NC' / 'JMA' / '2015_2019' / variable
    
    if not opath.exists(): 
        opath.mkdir(parents=True)
    
    folder = dpath / variable
    
    for year in range(2015, 2019 + 1): 
        
        list_files = list(folder.glob(f"*.{year}??"))
        
        list_files.sort()
        
        print(f"year {year} has {len(list_files)} months")

        dset_year = []

        for grb_file_name in list_files: 

            grbs = pygrib.open(str(grb_file_name))
            print(grbs.name)
            var_name_grb = "_".join(grb_file_name.name.split('_')[0:2])

            analysis_Dates = []
            validity_Dates = []
            data = []

            for g in grbs: 
                print(g.forecastTime)
                data.append(g.values)
                analysis_Dates.append(g.analDate)
                # validDates.append(g.validDate)
                validity_Dates.append(datetime.strptime(str(g.validityDate), "%Y%m%d"))   

            data = np.array(data)

            # take only the monthly data (step 1,2,3, index 1 to 4, the first value at index 0 is the seasonal average)

            data = data[1:,:,:]
            validity_Dates = validity_Dates[1:]

            data = data[np.newaxis, ...]

            lats, lons = g.latlons()

            lats = lats[:,0]
            lons = lons[0,:]

            d = {}

            d['time'] = (('time'), np.unique(analysis_Dates))
            d['step'] = (('step'), [1,2,3])
            d['lat'] = (('lat'), lats)
            d['lon'] = (('lon'), lons)
            d[dict_vars[var_name_grb]] = (('time', 'step', 'lat','lon'), data)

            dset = xr.Dataset(d)

            dset = dset.sortby('lat')

            dset_year.append(dset)
            
        dset = xr.concat(dset_year, dim='time')
        
        dset.to_netcdf(opath / f"JMA_{variable}_{year}.nc")
        
        dset.close() 


year 2015 has 12 months
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201501
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201502
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201503
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201504
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201505
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201506
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201507
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201508
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201509
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201510
1
1
2
3
/home/nicolasf/research/Smart_Ideas/data/GCMs/GRIB/JMA/Z500/p500_Phh_em.201511
1
1
2
3
/home/nicolasf/rese