### this file is used to convert data from grib to nc. you are able to select the dates, vars, and prs levels you want and it will return training/testing features and labels

In [16]:
import sys
from datetime import timedelta
import datetime
import xarray as xr
import re
import argparse
# import tensorflow as tf
import glob
import os
import pandas as pd
import numpy as np
import time
import cartopy.crs as ccrs
from pathlib import Path
import calendar
import multiprocessing

In [17]:
#make list of good files and print out any bad files
def is_non_zero_file(fpath):
    if os.path.isfile(fpath) and os.path.getsize(fpath) > 0:
#         print("this file is good ", fpath) 
        return fpath
    else:
        print("file not found or corrupt ", fpath) 
        return
#         raise Exception('No good file found ', fpath)

In [18]:
def days2files(input_path, start_date, end_date, init_time, model, fh, prs):
    DATES = pd.date_range(start_date, end_date)
                          
    if model == 'nam':
        fileList = [f'{input_path}{DATE:%Y}/{DATE:%m}/nam_218_{DATE:%Y%m%d}_{init_time}00_0{f:02d}.grb2' for DATE in DATES for f in fh]
    elif model == 'hrrr':
        if prs:
            fileList = [f'{input_path}prs/{DATE:%Y}/{DATE:%m}/{DATE:%Y%m%d}_hrrr.t{init_time}z.wrfprsf{f:02d}.grib2' for DATE in DATES for f in fh]
        else:            
            fileList = [f'{input_path}/{DATE:%Y}/{DATE:%m}/{DATE:%Y%m%d}_hrrr.t{init_time}z.wrfsfcf{f:02d}.grib2' for DATE in DATES for f in fh]
    elif model == 'gfs':
        fileList = [f'{input_path}{DATE:%Y}/{DATE:%m}/gfs_4_{DATE:%Y%m%d}_{init_time}00_0{f:02d}.grb2' for DATE in DATES for f in fh]
    
    fileList.sort()
    
    if len(fileList) == 0:
        raise Exception('No files found')
    
    goodFiles = []
    
    #check to make sure file is not empty and actually is there
    for file in fileList:
        if (is_non_zero_file(file)) is not None:
            goodFiles.append(is_non_zero_file(file))
        
    print(goodFiles)
    return goodFiles

In [19]:
#call this preprocessing when reading in multi files to clean up and only take what we need while reading to save time and mem
def preprocessNam(ds):
    #get available vars
    available_vars = [ v for v in nam_vars if v in ds.keys() ]
    #make sure full grid is there
    if ds.dims['x'] != 614 and ds.dims['y'] != 428:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()

    ds = ds[available_vars]
    return ds

def preprocessNamALL(ds):
    #make sure full grid is there
    if ds.dims['x'] != 614 and ds.dims['y'] != 428:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    return ds

In [20]:
#call this preprocessing when reading in multi files to clean up and only take what we need while reading to save time and mem
def preprocessHRRRPrs(ds):
    #make sure full grid is there
    if ds.dims['x'] != 1799 and ds.dims['y'] != 1059:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()

    available_vars = [ v for v in hrrr_prs_vars if v in ds.keys() ]
    available_pres = [ p for p in pres if p in ds.coords['isobaricInhPa'].values ]

    if len(available_pres)<3:
        return xr.Dataset(coords={'isobaricInhPa': ('isobaricInhPa', pres)})
    #drop = [ d for d in dim if d not in ['lv_ISBL0', 'xgrid_0', 'ygrid_0'] ]

    ds = ds[available_vars].sel(isobaricInhPa = available_pres)
#     dim = ds.dims
#     drop = [ d for d in dim if d not in ['isobaricInhPa', 'latitude', 'longitude'] ]
    #ds = ds.drop(drop)
    
    return ds

def preprocessHRRRPrsALL(ds):
            #make sure full grid is there
    if ds.dims['x'] != 1799 and ds.dims['y'] != 1059:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()

    available_pres = [ p for p in pres if p in ds.coords['isobaricInhPa'].values ]

    if len(available_pres)<3:
        return xr.Dataset(coords={'isobaricInhPa': ('isobaricInhPa', pres)})

    ds = ds.sel(isobaricInhPa = available_pres)
    return ds

In [21]:
#call this preprocessing when reading in multi files to clean up and only take what we need while reading to save time and mem
def preprocessHRRRSrf(ds):
    #make sure full grid is there
    if ds.dims['x'] != 1799 and ds.dims['y'] != 1059:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()

    available_vars = [ v for v in hrrr_sfc_vars if v in ds.keys() ]
    ds = ds[available_vars]
    
    return ds

def preprocessHRRRSrfALL(ds):
    #make sure full grid is there
    if ds.dims['x'] != 1799 and ds.dims['y'] != 1059:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    
    return ds

In [22]:
def preprocessGFS(ds):
    #get available vars
    available_vars = [ v for v in gfs_vars if v in ds.keys() ]
    #make sure full grid is there
    if ds.dims['latitude'] != 361 and ds.dims['longitude'] != 720:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()

    ds = ds[available_vars]
    return ds

def preprocessGFSALL(ds):
    #make sure full grid is there
    if ds.dims['latitude'] != 361 and ds.dims['longitude'] != 720:
        print("bad dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    else:
        print("good dimensions in file: ",ds.encoding["source"])
        sys.stdout.flush()
    return ds

In [23]:
def drop_variables(ds, remove_vars):
    for var in remove_vars:
        keys = [v for v in ds.keys()]
        if var in keys:
            print(f'dropping {var} from dataset')
            ds = ds.drop(var)
    return ds

In [24]:
def read_data_in_one_file(fileList, model, prs):
    dict_opts = [{'typeOfLevel': 'heightAboveGround','level':2}, 
            {'typeOfLevel': 'heightAboveGround','level':10},
            {'typeOfLevel': 'surface','stepType': 'accum'}]
    
    if model == 'hrrr':
        if prs:
            return xr.open_dataset(fileList, 
                           engine='cfgrib',
                           backend_kwargs={'indexpath':'','filter_by_keys':{'typeOfLevel': 'isobaricInhPa',}},
                          )
        else:

            ds_save = []
            for opt in dict_opts:
                ds_save += [xr.open_dataset(fileList, 
                               engine='cfgrib',
                               backend_kwargs={'indexpath':'','filter_by_keys':opt},
                              )]
            ds = xr.merge(ds_save, compat='override')
            ds = drop_variables(ds, ['unknown','acpcp','sdwe','ssrun','bgrun'])

            return ds

    elif model == 'gfs':
        ds_save = []
        for opt in dict_opts:
            ds_save += [xr.open_dataset(fileList, 
                           engine='cfgrib',
                           backend_kwargs={'indexpath':'','filter_by_keys':opt},
                          )]
        ds = xr.merge(ds_save, compat='override')  
        ds = drop_variables(ds, ['unknown','acpcp','sdwe','ssrun','bgrun'])

        return ds    

In [25]:
def read_data(fileList, model, prs):
    
    # list the datasets that you want extracted from grib file
    dict_opts = [{'typeOfLevel': 'heightAboveGround','level':2}, 
                {'typeOfLevel': 'heightAboveGround','level':10},
                {'typeOfLevel': 'surface','stepType': 'accum'},
                {'typeOfLevel': 'meanSea'},
                {'typeOfLevel':'surface', 'cfVarName': 'orog'}]
  
    if model == 'nam':
        # this solution/option exists because open_mfdataset cannot handle the 'unknown' variables
        # within the NAM grib files. Ideally would rather figure out how to use the usual open_mfdataset
        # solution, but not able to at this time.
        ds_opt_save = []
        for opt in dict_opts:
            ds_save = []
            for file in fileList:
                print(file)
                ds_save += [xr.open_dataset(file, 
                               engine='cfgrib',
                               backend_kwargs={'indexpath':'','filter_by_keys':opt},
                              )]
            ds_files = xr.combine_nested(ds_save, concat_dim='time') 
            ds_opt_save += [ds_files]

        ds = xr.merge(ds_opt_save, compat='override') 
        ds = drop_variables(ds, ['unknown','acpcp','sdwe','ssrun','bgrun'])

        return ds

    elif model == 'hrrr':
        if prs:
            return xr.open_mfdataset(fileList, 
                            parallel=True,
                           engine='cfgrib',
                           concat_dim='time',
                           combine='nested', 
                           backend_kwargs={'indexpath':'','filter_by_keys':{'typeOfLevel': 'isobaricInhPa',}},
                           preprocess=preprocessHRRRPrsALL,
                          )
        else:

            ds_save = []
            for opt in dict_opts:
                ds_save += [xr.open_mfdataset(fileList, 
                                parallel=True,
                               engine='cfgrib',
                               concat_dim='time',
                               combine='nested', 
                               backend_kwargs={'indexpath':'','filter_by_keys':opt},
                               preprocess=preprocessHRRRSrfALL,
                              )]
            ds = xr.merge(ds_save, compat='override')
            ds = drop_variables(ds, ['unknown','acpcp','sdwe','ssrun','bgrun'])

            return ds

    elif model == 'gfs':
        ds_save = []
        for opt in dict_opts:
            ds_save += [xr.open_mfdataset(fileList, 
                            parallel=True,
                           engine='cfgrib',
                           concat_dim='time',
                           combine='nested', 
                           backend_kwargs={'indexpath':'','filter_by_keys':opt},
                           preprocess=preprocessGFSALL,
                          )]
        ds = xr.merge(ds_save, compat='override')  
        ds = drop_variables(ds, ['unknown','acpcp','sdwe','ssrun','bgrun'])

        return ds

In [26]:
def return_ds_with_projection(ds):
    # In order to slice by lat & lon values, need to transform the grid into a projection
    # solution from https://stackoverflow.com/questions/58758480/xarray-select-nearest-lat-lon-with-multi-dimension-coordinates
    projection = ccrs.LambertConformal(central_longitude=-97.5,
                                 central_latitude=38.5,
                                 standard_parallels=[38.5])
    transform = np.vectorize(lambda x, y: projection.transform_point(x, y, ccrs.PlateCarree()))

    # The grid should be aligned such that the projection x and y are the same
    # at every y and x index respectively
    grid_y = ds.isel(x=0)
    grid_x = ds.isel(y=0)

    _, proj_y = transform(grid_y.longitude, grid_y.latitude)
    proj_x, _ = transform(grid_x.longitude, grid_x.latitude)

    # ds.sel only works on the dimensions, so we can't just add
    # proj_x and proj_y as additional coordinate variables
    ds["x"] = proj_x
    ds["y"] = proj_y
    
    # grab the unique latitude and longitude for NYSM sites
    nysm_path = '/home/aevans/nysm/archive/nysm/netcdf/proc/2019/01/'
    ds_nysm = xr.open_dataset(f'{nysm_path}20190101.nc')
    df = ds_nysm.to_dataframe()

    nysm_lats = df.lat.unique()
    nysm_lons = df.lon.unique()

    closest_to_nysm_lons_lats = [transform(nysm_lons[x], nysm_lats[x]) for x in range(len(nysm_lats))]
    closest_to_nysm_lons = [closest_to_nysm_lons_lats[x][0] for x in range(len(nysm_lats))]
    closest_to_nysm_lats = [closest_to_nysm_lons_lats[x][1] for x in range(len(nysm_lats))]

    xx = xr.DataArray(closest_to_nysm_lons,dims='z')
    yy = xr.DataArray(closest_to_nysm_lats,dims='z')
    
    return ds.sel(x=xx, y=yy, method="nearest")

In [27]:
def return_ds_with_projection_ok(ds):
    # In order to slice by lat & lon values, need to transform the grid into a projection
    # solution from https://stackoverflow.com/questions/58758480/xarray-select-nearest-lat-lon-with-multi-dimension-coordinates
    projection = ccrs.LambertConformal(central_longitude=-98.8,
                                 central_latitude=35.4,
                                 standard_parallels=[35.4])
    transform = np.vectorize(lambda x, y: projection.transform_point(x, y, ccrs.PlateCarree()))

    # The grid should be aligned such that the projection x and y are the same
    # at every y and x index respectively
    grid_y = ds.isel(x=0)
    grid_x = ds.isel(y=0)

    _, proj_y = transform(grid_y.longitude, grid_y.latitude)
    proj_x, _ = transform(grid_x.longitude, grid_x.latitude)

    # ds.sel only works on the dimensions, so we can't just add
    # proj_x and proj_y as additional coordinate variables
    ds["x"] = proj_x
    ds["y"] = proj_y
    
    # grab the unique latitude and longitude for NYSM sites
    df = pd.read_csv('/home/aevans/landtype/geoinfo.csv')

    oksm_lats = df.lat.unique()
    oksm_lons = df.lon.unique()

    closest_to_oksm_lons_lats = [transform(oksm_lons[x], oksm_lats[x]) for x in range(len(oksm_lats))]
    closest_to_oksm_lons = [closest_to_oksm_lons_lats[x][0] for x in range(len(oksm_lats))]
    closest_to_oksm_lats = [closest_to_oksm_lons_lats[x][1] for x in range(len(oksm_lats))]

    xx = xr.DataArray(closest_to_oksm_lons,dims='z')
    yy = xr.DataArray(closest_to_oksm_lats,dims='z')
    
    return ds.sel(x=xx, y=yy, method="nearest")

In [28]:
def define_grid_bounds(ds, model, file):
    ds = ds.assign_coords({"longitude": (((ds.longitude + 180) % 360) - 180)})
        
    if model!='gfs':
        ds_grid = xr.open_dataset(file,engine='cfgrib',backend_kwargs={'indexpath':'', 'filter_by_keys':
                                                           {'typeOfLevel': 'heightAboveGround','level':2,
                                                           'cfVarName': 't2m'}})
        central_longitude = (((ds_grid.t2m.attrs.get('GRIB_LoVInDegrees') + 180) % 360) - 180)
        central_latitude = ds_grid.t2m.attrs.get('GRIB_LaDInDegrees')
        projection = ccrs.LambertConformal(central_longitude= central_longitude,
                             central_latitude=central_latitude,
                             standard_parallels=[central_latitude])
        transform = np.vectorize(lambda x, y: projection.transform_point(x, y, ccrs.PlateCarree()))

        # The grid should be aligned such that the projection x and y are the same
        # at every y and x index respectively
        grid_y = ds.isel(x=0)
        grid_x = ds.isel(y=0)
        _, proj_y = transform(grid_y.longitude, grid_y.latitude)
        proj_x, _ = transform(grid_x.longitude, grid_x.latitude)

        # ds.sel only works on the dimensions, so we can't just add
        # proj_x and proj_y as additional coordinate variables
        ds["x"] = proj_x
        ds["y"] = proj_y
    
    # set the longitude and latitude bounds of the smaller grid
    # that you want to extract from the model data

        # set the longitude and latitude bounds of the smaller grid
    # that you want to extract from the model data
    long_min, long_max = -103.5, -65
    lat_min, lat_max = 33, 47
    

    if model!='gfs':
        x_min, y_min = transform(long_min, lat_min)
        x_max, y_max = transform(long_max, lat_max)
        
    # use the x, y min and max values from above to make the selection from the dataset
    # this is better than solely selecting the point locations because I will need different solutions for diff models
    # and those solutions should be independent of this cleaning script
    
    if model!='gfs':
        ds_return = ds.sel(x=slice(x_min, x_max), y=slice(y_min, y_max))
    else:
        mask_lon = (ds.longitude >= long_min) & (ds.longitude <= long_max)
        mask_lat = (ds.latitude >= lat_min) & (ds.latitude <= lat_max)
        ds_return = ds.where(mask_lon & mask_lat, drop=True)
        
    return ds_return

In [29]:
def define_grid_bounds_ok(ds, model, file):
    ds = ds.assign_coords({"longitude": (((ds.longitude + 180) % 360) - 180)})
        
    if model!='gfs':
        ds_grid = xr.open_dataset(file,engine='cfgrib',backend_kwargs={'indexpath':'', 'filter_by_keys':
                                                           {'typeOfLevel': 'heightAboveGround','level':2,
                                                           'cfVarName': 't2m'}})
        central_longitude = (((ds_grid.t2m.attrs.get('GRIB_LoVInDegrees') + 180) % 360) - 180)
        central_latitude = ds_grid.t2m.attrs.get('GRIB_LaDInDegrees')
        projection = ccrs.LambertConformal(central_longitude= central_longitude,
                             central_latitude=central_latitude,
                             standard_parallels=[central_latitude])
        transform = np.vectorize(lambda x, y: projection.transform_point(x, y, ccrs.PlateCarree()))

        # The grid should be aligned such that the projection x and y are the same
        # at every y and x index respectively
        grid_y = ds.isel(x=0)
        grid_x = ds.isel(y=0)
        _, proj_y = transform(grid_y.longitude, grid_y.latitude)
        proj_x, _ = transform(grid_x.longitude, grid_x.latitude)

        # ds.sel only works on the dimensions, so we can't just add
        # proj_x and proj_y as additional coordinate variables
        ds["x"] = proj_x
        ds["y"] = proj_y
    
    # set the longitude and latitude bounds of the smaller grid
    # that you want to extract from the model data
    long_min, long_max = -103.5, -94.4
    lat_min, lat_max = 33, 38
    

    if model!='gfs':
        x_min, y_min = transform(long_min, lat_min)
        x_max, y_max = transform(long_max, lat_max)
        
    # use the x, y min and max values from above to make the selection from the dataset
    # this is better than solely selecting the point locations because I will need different solutions for diff models
    # and those solutions should be independent of this cleaning script
    
    if model!='gfs':
        ds_return = ds.sel(x=slice(x_min, x_max), y=slice(y_min, y_max))
    else:
        mask_lon = (ds.longitude >= long_min) & (ds.longitude <= long_max)
        mask_lat = (ds.latitude >= lat_min) & (ds.latitude <= lat_max)
        ds_return = ds.where(mask_lon & mask_lat, drop=True)
        
    return ds_return

In [30]:
#def main(model, year, init_time, prs=False, combined_file=False):
def main(prs=False, combined_file=False):
    '''
    This is the main function that converts grib files from the GFS, NAM, and HRRR to parquet files.
    The function loops over all months and day in a given year.
    Within the conversion, specific variables are extracted. The datasets where these can be found
    need to be specified within read_data(). Smaller forecast grids focused around NYS are defined 
    and saved within these parquet files.
    
    The following parameters need to be passed into main():
    
    model (str) - hrrr, nam, gfs
    year (int) - the year of interest (e.g., 2020)
    init_time (str) - initilization time for model, 00 or 12 UTC
    prs (bool) - true if you want the pressure files, false if you only want surface
    combined_file (bool) - this flag should be turned on if multiple forecast times exist in one grib file
    '''
    for year in [2018, 2019, 2020, 2021]:
        model = 'gfs'
        year = year 
        init_time = '00'

        # input_path (str) - path to base location of data
        # output_path (str) - where to write new smaller clean files
        if combined_file:
            input_path = '/home/aevans/ai2es/GFS/GFSv16_parallel'
            output_path = '/home/aevans/ai2es/GFS/GFSv16_parallel/cleaned'
        else:
            input_path = f"/home/aevans/ai2es/{model.upper()}/"
            output_path = f"/home/aevans/ai2es/{model.upper()}/cleaned"
        # choosing to start at first ~forecast~ time rather than ~init~ time because of variable list inconsistencies
        if model=='hrrr':
            fh = range(1, 19) #forecast hours, second num exclusive
        elif model=='nam':
            fh = np.arange(1,37,1).tolist() + np.arange(39,85,3).tolist()
        elif model =='gfs':
            fh = np.arange(3, 99, 3)

        #loop through months & days
        for month in range(1, 13): #call all months in calendar year
            num_days = calendar.monthrange(year, month)[1]
            for day in range(1, num_days+1): #call all days in calendar month & respective year
                print("This is your path!")
                print(".    .")
                print(f'{output_path}{model.upper()}/{year}/{month}/{year}{month}{day}_{model}.t{init_time}z_fhAll.parquet')

                start_date = datetime.datetime(year, month, day)
                end_date = datetime.datetime(year, month, day)
        
                if combined_file:
                    fileList = f'{input_path}{year}{str(month).zfill(2)}{str(day).zfill(2)}{init_time}_{model}.grb2'
                else:
                    fileList = days2files(input_path, start_date, end_date, init_time, model, fh, prs)
                print(fileList)

                if not fileList:
                    print('No files exist to read!')
                else:
                    if combined_file:
                        ds = read_data_in_one_file(fileList, model, prs)
                        ds = define_grid_bounds(ds, model, fileList)
                    else:
                        ds = read_data(fileList, model, prs)
                        ds = define_grid_bounds(ds, model, fileList[-1])
                    
                    #fill all na values with 0
                    ds = ds.fillna(0)
                    df = ds.to_dataframe(dim_order=None)

                    if model == 'hrrr':
                        new_index = ['time','y','x']
                    elif model in ['gfs','nam']:
                        new_index = ['time','latitude','longitude']

                    # drop step since val time already has it and drop other data group names 
                    # as these do not include info that is necessary to keep
                    if model=='gfs' and combined_file==True:
                        df = df.reset_index().drop(['step', 'heightAboveGround', 'surface'], axis=1).set_index(new_index)
                    else:
                        df = df.reset_index().drop(['step', 'heightAboveGround', 'surface', 'meanSea'], axis=1).set_index(new_index)

                    #save the data to parquet file
                    sday = str(start_date.day).zfill(2)
                    smonth = str(start_date.month).zfill(2)
                    syear = start_date.year
                    
                    savepath = f'{output_path}{model.upper()}/{syear}/{smonth}/'
                    #create this directory if it doesn't already exist
                    Path(savepath).mkdir(parents=True, exist_ok=True)
                    if model == 'hrrr':
                        df.to_parquet(f'{savepath}{syear}{smonth}{sday}_{model}.t{init_time}z_wrfsfc_fhAll.parquet')
                    else:
                        df.to_parquet(f'{savepath}{syear}{smonth}{sday}_{model}.t{init_time}z_fhAll.parquet')
                        
            print("This is your path!")
            print(".    .")
            print(f'{output_path}{model.upper()}/{year}/{month}/{year}{month}{day}_{model}.t{init_time}z_wrfsfc_fhAll.parquet')

In [31]:
# if __name__ == '__main__':
#     pool = multiprocessing.Pool(16)
#     pool.map(main, [])
#     pool.close()

In [32]:
main()

This is your path!
.    .
/home/aevans/ai2es/GFS/cleanedGFS/2018/1/201811_gfs.t00z_wrfsfc_fhAll.parquet
['/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_003.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_006.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_009.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_012.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_015.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_018.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_021.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_024.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_027.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_030.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_033.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_036.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_039.grb2', '/home/aevans/ai2es/GFS/2018/01/gfs_4_20180101_0000_042.grb2', '/home/aevans

In [None]:
%%time
main('nam', 2018, '00')