# combine dictionaries into something useful

In [1]:
import helper_functions

In [2]:
# temporary
ECMWF = helper_functions.load_from_pickle('data/update/ecmwf.pickle')
IITM = helper_functions.load_from_pickle('data/update/iitm.pickle')

ERA5_ecmwf = helper_functions.load_from_pickle('data/update/era5_ecmwf.pickle')
ERA5_iitm = helper_functions.load_from_pickle('data/update/era5_iitm.pickle')

MERRA2_ecmwf = helper_functions.load_from_pickle('data/update/merra2_ecmwf.pickle')
MERRA2_iitm = helper_functions.load_from_pickle('data/update/merra2_iitm.pickle')

[ERA5_depressions_ecmwf,
 ERA5_lows_ecmwf, 
 ERA5_depressions_iitm,
 ERA5_lows_iitm] = helper_functions.load_from_pickle('data/update/depressions_lows.pickle')

## models & ERA5

In [3]:
# temporary
ERA5_ECMWF_joined = helper_functions.join_model_obs(ECMWF, ERA5_ecmwf, '_era5', '_ecmwf')
ERA5_IITM_joined = helper_functions.join_model_obs(IITM, ERA5_iitm, '_era5', '_iitm')

In [4]:
len(ERA5_ECMWF_joined), len(ERA5_IITM_joined)

(117, 103)

In [5]:
# temporary
ERA5_ECMWF_deps = helper_functions.join_model_obs(ECMWF, ERA5_depressions_ecmwf, '_era5', '_ecmwf')
ERA5_ECMWF_lows = helper_functions.join_model_obs(ECMWF, ERA5_lows_ecmwf, '_era5', '_ecmwf')

ERA5_IITM_deps = helper_functions.join_model_obs(IITM, ERA5_depressions_iitm, '_era5', '_iitm')
ERA5_IITM_lows = helper_functions.join_model_obs(IITM, ERA5_lows_iitm, '_era5', '_iitm')

In [6]:
# these are ERA5 depressions/lows, with two different sets for ECMWF and IITM hits
len(ERA5_ECMWF_deps), len(ERA5_ECMWF_lows), len(ERA5_IITM_deps), len(ERA5_IITM_lows)

(56, 61, 52, 51)

In [8]:
# temporary
models_joined_with_era5 = [ERA5_ECMWF_joined, ERA5_IITM_joined, ERA5_ECMWF_deps, ERA5_ECMWF_lows,
                          ERA5_IITM_deps, ERA5_IITM_lows]
helper_functions.store_as_pickle(models_joined_with_era5, 'data/update/models_joined_with_era5.pickle')

## models & MERRA2

In [9]:
# doing this because MERRA2 tracks might have genesis after model genesis, and we want
#    ForecastLeadTime to be measured since model genesis
# Use case: if the model starts before the MERRA2 track, even though the first few comparison points
#    will be NaN, we still want zero ForecastLeadTime to represent model genesis
model = ECMWF
obs = MERRA2_ecmwf
lsuffix = '_ecmwf'
rsuffix = '_merra2'

MERRA2_ECMWF_joined = {key: model_df.merge(obs[key], on='date', how='left', 
                                           suffixes=(lsuffix, rsuffix)).set_index('date')
                       for key, model_df in model.items() if key in obs}

In [10]:
len(MERRA2_ECMWF_joined)

103

In [11]:
model = IITM
obs = MERRA2_iitm
lsuffix = '_iitm'
rsuffix = '_merra2'

MERRA2_IITM_joined = {key: model_df.merge(obs[key], on='date', how='left', 
                                           suffixes=(lsuffix, rsuffix)).set_index('date')
                       for key, model_df in model.items() if key in obs}

In [12]:
len(MERRA2_IITM_joined)

94

In [75]:
# DO NOT USE THESE, THEY DO AN INNER JOIN AND THIS ONLY WORKS FOR ERA5
#    since ERA5 tracks can never start after model tracks, so ForecastLeadTime
#    from an inner join will always represent time since model genesis
# MERRA2_ECMWF_joined = join_model_obs(ECMWF, MERRA2_ecmwf, '_merra2', '_ecmwf')
# MERRA2_IITM_joined = join_model_obs(IITM, MERRA2_iitm, '_merra2', '_iitm')

In [13]:
models_joined_with_merra2 = [MERRA2_ECMWF_joined, MERRA2_IITM_joined]
helper_functions.store_as_pickle(models_joined_with_merra2, 
                                 'data/update/models_joined_with_merra2.pickle')