In [1]:
import gc
import cfgrib
import pygrib

import pandas as pd
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt

from glob import glob
from functools import reduce
from datetime import datetime

In [2]:
model = 'gfs0p25'
archive = '/uufs/chpc.utah.edu/common/home/steenburgh-group10/mewessler/archive/'

In [3]:
init = datetime(2020, 1, 1, 0, 0)
date_fmt = '%Y%m%d'
datetime_fmt = '%Y%m%d%H'

## Data Ingest

In [38]:
flist = glob(archive + init.strftime(date_fmt) + '/models/%s/*%s*.grib2'%(model, init.strftime(datetime_fmt)))

datasets = cfgrib.open_datasets(flist[1])

train_keys = ['tp', 'q', 't', 'u', 'v', 'absv', 'w', 'gh', 'r', 'd', 
              'u10', 'v10', 'u100', 'v100', 't2m', 'd2m', 
              'cape', 'prmsl', 'sp', 'orog', 'hpbl']

derived = ['day_of_year', 'spd', 'dir']

sfc, iso, hAG = [], [], []

for ds in datasets:
    
    key_match = np.array(list(ds.data_vars))[np.isin(list(ds.data_vars), train_keys)]
    
    if len(key_match) > 0:
        
        dims = ds.dims.keys()
        coords = ds[key_match].coords
    
        if ('heightAboveGround' in coords) & ('heightAboveGround' not in dims):
            sfc.append(ds[key_match].drop('heightAboveGround'))
            
        elif 'isobaricInhPa' in coords:
            iso.append(ds[key_match])

        elif (('surface' in coords)|('meanSea' in coords)):
            sfc.append(ds[key_match])
        
        elif 'prmsl' in list(ds.data_vars):
            sfc.append(ds['prmsl'])
            
        else:
            pass
        
    else:
        pass
        #print(list(ds.data_vars))
        
sfc = xr.merge(sfc).drop('t')
iso = xr.merge(iso).rename({'isobaricInhPa':'level'})
iso = iso.sel(level=iso.level[::-1])

In [41]:
u, v = iso['u'], iso['v']
wdir = 90 - np.degrees(np.arctan2(-v, -u))
wdir = xr.where(wdir <= 0, wdir+360, wdir)
wdir = xr.where(((u == 0) & (v == 0)), 0, wdir)

iso['dir'] = wdir
iso['spd'] = np.sqrt(u**2 + v**2)

for hgt in [10, 100]:
    
    u, v = sfc['u%d'%hgt], sfc['v%d'%hgt]
    wdir = 90 - np.degrees(np.arctan2(-v, -u))
    wdir = xr.where(wdir <= 0, wdir+360, wdir)
    wdir = xr.where(((u == 0) & (v == 0)), 0, wdir)
    
    sfc['dir%dm'%hgt] = wdir
    sfc['spd%dm'%hgt] = np.sqrt(u**2 + v**2)

In [42]:
# sfc['day_of_year'] = (('latitude', 'longitude'), 
#                       np.full(sfc.orog.shape, 
#                               fill_value=pd.to_datetime(
#                                   sfc.time.values).strftime('%j')).astype(int))
# sfc

## Transform to levels above ground

In [43]:
orog = sfc.orog
gh = iso.gh

lowest_level = np.full(orog.shape, fill_value=np.nan)
lowest_level_index = np.full(orog.shape, fill_value=np.nan)

for i, level in enumerate(iso['level']):
    
    lev_gh = gh.sel(level=level)
    lowest_level = xr.where(orog >= lev_gh, level.values, lowest_level)
    lowest_level_index = xr.where(orog >= lev_gh, i, lowest_level_index)
    
lowest_level_index = xr.where(np.isnan(lowest_level), 0, lowest_level_index)
lowest_level = xr.where(np.isnan(lowest_level), 1000, lowest_level)

In [44]:
df = []
match_rename = {'absv':'vo', 'gh':'z', 'hpbl':'blh', 'prmsl':'msl', 'tp':'swe_mm',
               'u10':'u10m', 'v10':'v10m', 'u100':'u100m', 'v100':'v100m'}

# Loop over each variable in the xarray
for ds in [iso, sfc.drop('orog')]:
    
    for var_name in ds.data_vars:
        
        new_var_name = match_rename[var_name] if var_name in match_rename.keys() else var_name
        print('Reducing (%s) to %s index level AGL'%(var_name, new_var_name))

        var = ds[var_name]

        if 'level' in var.coords:

            for i in np.arange(10):

                var_agl = np.full(shape=(orog.shape), fill_value=np.nan)

                for j, level in enumerate(iso['level']):

                    var_agl = xr.where(lowest_level_index+i == j, var.isel(level=j), var_agl)

                    # Record the levels used, should match lowest_level array, sanity check
                    # var_agl[i, :, :] = xr.where(lowest_level_index+i == j, level, var_agl[i, :, :])

                # We could ho ahead and append to the pandas dataframe here 
                # at the completion of each level (_01agl, _02agl...)
                # We will have to use [(time), lat, lon] as a multiindex
                var_agl = xr.DataArray(var_agl, 
                     dims=['latitude', 'longitude'], 
                     coords={'latitude':iso['latitude'], 
                             'longitude':iso['longitude']})

                df.append(var_agl.to_dataframe(name='%s_%02dagl'%(new_var_name.upper(), i+1)))

                del var_agl
                gc.collect()

        else:

            var_agl = xr.DataArray(var.values, 
                dims=['latitude', 'longitude'], 
                coords={'latitude':iso['latitude'], 
                     'longitude':iso['longitude']})

            df.append(var_agl.to_dataframe(name='%s'%new_var_name.upper()))
            
df_merged = reduce(lambda left, right: pd.merge(left, right, on=['latitude', 'longitude']), df)
df_merged

Reducing (t) to t index level AGL
Reducing (gh) to z index level AGL
Reducing (u) to u index level AGL
Reducing (v) to v index level AGL
Reducing (r) to r index level AGL
Reducing (w) to w index level AGL
Reducing (absv) to vo index level AGL
Reducing (dir) to dir index level AGL
Reducing (spd) to spd index level AGL
Reducing (u10) to u10m index level AGL
Reducing (v10) to v10m index level AGL
Reducing (t2m) to t2m index level AGL
Reducing (d2m) to d2m index level AGL
Reducing (u100) to u100m index level AGL
Reducing (v100) to v100m index level AGL
Reducing (prmsl) to msl index level AGL
Reducing (cape) to cape index level AGL
Reducing (sp) to sp index level AGL
Reducing (tp) to swe_mm index level AGL
Reducing (hpbl) to blh index level AGL
Reducing (dir10m) to dir10m index level AGL
Reducing (spd10m) to spd10m index level AGL
Reducing (dir100m) to dir100m index level AGL
Reducing (spd100m) to spd100m index level AGL


Unnamed: 0_level_0,Unnamed: 1_level_0,T_01agl,T_02agl,T_03agl,T_04agl,T_05agl,T_06agl,T_07agl,T_08agl,T_09agl,T_10agl,...,V100M,MSL,CAPE,SP,SWE_MM,BLH,DIR10M,SPD10M,DIR100M,SPD100M
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
30.0,230.00,287.311493,285.267578,283.367554,283.156982,285.396637,284.962311,283.909668,282.034271,278.899994,275.299988,...,-10.338528,102697.140625,0.0,102708.226562,0.0,921.954651,31.069725,10.558015,31.362392,12.107527
30.0,230.25,287.311493,285.167572,283.267548,283.156982,285.496643,284.862305,283.709656,282.034271,279.000000,275.299988,...,-10.458528,102683.539062,0.0,102687.421875,0.0,905.074707,29.547707,10.521767,29.848236,12.058069
30.0,230.50,287.211487,285.167572,283.267548,283.256989,285.596619,284.862305,283.609680,281.834290,278.899994,275.200012,...,-10.538527,102670.101562,0.0,102663.421875,0.0,897.874695,28.286224,10.485411,28.665176,12.010569
30.0,230.75,287.111511,285.167572,283.167542,283.256989,285.696625,284.962311,283.409668,281.534271,278.799988,275.200012,...,-10.668528,102656.820312,1.0,102642.625000,0.0,895.074707,27.186550,10.481296,27.447876,12.021817
30.0,231.00,287.111511,285.167572,283.167542,282.956970,285.496643,284.862305,283.209656,281.434265,278.799988,275.200012,...,-10.958528,102643.218750,1.0,102625.023438,0.0,908.834656,25.391586,10.608125,25.688835,12.160447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50.0,259.00,268.167542,269.056976,269.596619,274.862305,272.809662,269.334290,266.100006,262.899994,260.201294,255.004852,...,-0.418528,99780.500000,0.0,94156.226562,0.0,51.714676,259.058990,2.985478,273.771729,6.362428
50.0,259.25,267.267548,268.356995,268.996643,275.462311,272.809662,269.334290,266.200012,262.899994,260.101288,254.804855,...,0.861472,99783.218750,0.0,94221.828125,0.0,68.514679,247.432449,3.065980,262.425720,6.535672
50.0,259.50,266.367554,267.656982,268.496643,275.562317,272.709656,269.334290,266.299988,262.899994,259.901306,254.904861,...,1.961472,99786.421875,0.0,94226.625000,0.0,108.674675,245.023239,3.354970,254.977127,7.567278
50.0,259.75,265.767548,266.956970,268.296631,275.662292,272.709656,269.434265,266.399994,262.899994,259.801300,254.804855,...,2.411472,99790.257812,0.0,94188.226562,0.0,147.794678,250.329010,3.792539,253.609772,8.545936


In [55]:
# Failsafes:
ordered_keys = ['SWE_MM', 'T_01agl', 'U_01agl', 'V_01agl', 'VO_01agl', 'W_01agl', 'Z_01agl', 'R_01agl', 'SPD_01agl', 
                'T_02agl', 'U_02agl', 'V_02agl', 'VO_02agl', 'W_02agl', 'Z_02agl', 'R_02agl', 'SPD_02agl', 
                'T_03agl', 'U_03agl', 'V_03agl', 'VO_03agl', 'W_03agl', 'Z_03agl', 'R_03agl', 'SPD_03agl', 
                'T_04agl', 'U_04agl', 'V_04agl', 'VO_04agl', 'W_04agl', 'Z_04agl', 'R_04agl', 'SPD_04agl', 
                'T_05agl', 'U_05agl', 'V_05agl', 'VO_05agl', 'W_05agl', 'Z_05agl', 'R_05agl', 'SPD_05agl', 
                'T_06agl', 'U_06agl', 'V_06agl', 'VO_06agl', 'W_06agl', 'Z_06agl', 'R_06agl', 'SPD_06agl', 
                'T_07agl', 'U_07agl', 'V_07agl', 'VO_07agl', 'W_07agl', 'Z_07agl', 'R_07agl', 'SPD_07agl', 
                'T_08agl', 'U_08agl', 'V_08agl', 'VO_08agl', 'W_08agl', 'Z_08agl', 'R_08agl', 'SPD_08agl', 
                'T_09agl', 'U_09agl', 'V_09agl', 'VO_09agl', 'W_09agl', 'Z_09agl', 'R_09agl', 'SPD_09agl', 
                'T_10agl', 'U_10agl', 'V_10agl', 'VO_10agl', 'W_10agl', 'Z_10agl', 'R_10agl', 'SPD_10agl', 
                'T2M', 'D2M', 'BLH', 'CAPE', 'MSL', 'SP', 'U10M', 'V10M', 'U100M', 'V100M', 
                'SPD10M', 'DIR10M', 'SPD100M', 'DIR100M']

df_keys = np.array(list(df_merged.keys()))
df_keys

print('\nmissing from model', [k for k in df_merged.keys() if k not in ordered_keys])
print('\nmissing from input', [k for k in ordered_keys if k not in df_merged.keys()])


missing from model ['DIR_01agl', 'DIR_02agl', 'DIR_03agl', 'DIR_04agl', 'DIR_05agl', 'DIR_06agl', 'DIR_07agl', 'DIR_08agl', 'DIR_09agl', 'DIR_10agl']

missing from input []


In [56]:
df_merged.loc[:, ordered_keys]

Unnamed: 0_level_0,Unnamed: 1_level_0,SWE_MM,T_01agl,U_01agl,V_01agl,VO_01agl,W_01agl,Z_01agl,R_01agl,SPD_01agl,T_02agl,...,MSL,SP,U10M,V10M,U100M,V100M,SPD10M,DIR10M,SPD100M,DIR100M
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
30.0,230.00,0.0,287.311493,-6.668923,-10.833750,0.000071,0.007498,226.663437,69.400002,12.721819,285.267578,...,102697.140625,102708.226562,-5.448789,-9.043362,-6.301353,-10.338528,10.558015,31.069725,12.107527,31.362392
30.0,230.25,0.0,287.311493,-6.368923,-10.953750,0.000074,0.012498,224.871445,70.500000,12.670747,285.167572,...,102683.539062,102687.421875,-5.188789,-9.153362,-6.001352,-10.458528,10.521767,29.547707,12.058069,29.848236
30.0,230.50,0.0,287.211487,-6.088923,-11.043750,0.000075,0.001498,222.935440,71.500000,12.611082,285.167572,...,102670.101562,102663.421875,-4.968789,-9.233362,-5.761353,-10.538527,10.485411,28.286224,12.010569,28.665176
30.0,230.75,0.0,287.111511,-5.868923,-11.183750,0.000073,0.028498,221.095444,72.099998,12.630144,285.167572,...,102656.820312,102642.625000,-4.788789,-9.323361,-5.541353,-10.668528,10.481296,27.186550,12.021817,27.447876
30.0,231.00,0.0,287.111511,-5.588923,-11.503750,0.000074,0.033498,219.703430,72.300003,12.789539,285.167572,...,102643.218750,102625.023438,-4.548789,-9.583362,-5.271353,-10.958528,10.608125,25.391586,12.160447,25.688835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50.0,259.00,0.0,268.167542,3.661868,0.614028,0.000155,0.054643,389.700562,85.099998,3.712991,269.056976,...,99780.500000,94156.226562,2.931211,0.566638,6.348648,-0.418528,2.985478,259.058990,6.362428,273.771729
50.0,259.25,0.0,267.267548,3.551868,1.384028,0.000137,-0.059357,389.588562,88.099998,3.811994,268.356995,...,99783.218750,94221.828125,2.831211,1.176638,6.478647,0.861472,3.065980,247.432449,6.535672,262.425720
50.0,259.50,0.0,266.367554,3.761868,1.694028,0.000118,-0.073357,389.972565,90.599998,4.125698,267.656982,...,99786.421875,94226.625000,3.041211,1.416638,7.308648,1.961472,3.354970,245.023239,7.567278,254.977127
50.0,259.75,0.0,265.767548,4.281868,1.514028,0.000110,-0.090357,390.132568,91.900002,4.541659,266.956970,...,99790.257812,94188.226562,3.571211,1.276638,8.198647,2.411472,3.792539,250.329010,8.545936,253.609772
