## Basic Morphing Routine

In this notebook you will find the fundamental operations for the morphing workflow that this package was set up to deliver. 

## Local Dev

These imports are only necssary if you are working from a local copy of the development github.

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
module_path = "/Users/jmccarty/GitHub/pyepwmorph"
if module_path not in sys.path:
    sys.path.append(module_path)

## Imports

In [2]:
from pyepwmorph.tools import utilities as morph_utils
from pyepwmorph.tools import io as morph_io
from pyepwmorph.tools import configuration as morph_config
from pyepwmorph.models import access, coordinate, assemble
from pyepwmorph.tools import workflow as morph_work
from pyepwmorph.morph import procedures as morph_proc

import matplotlib.pyplot as plt

## Config Object

We recommend making using of the configuration object (tools.configuration.MorphConfig) to manage the workflow.

In [None]:
# set the project name, this will be used at the end for naming your files
project_name = "first_morph_project"

# input the filepath for the baseline EPW file that you are morphing
baseline_epw_file = "USA_MO_Whiteman.AFB.724467_TMY3.epw"

# set a list of user variables from the following:
# ['Temperature','Humidity','Pressure','Wind','Clouds and Radiation', 'Dew Point']
user_variables = ['Temperature', 'Humidity','Pressure', 'Wind', 'Clouds and Radiation', 'Dew Point']

# set a list of climate model pathways from the following:
# ['Best Case Scenario','Middle of the Road','Worst Case Scenario']
user_pathways = ['Best Case Scenario','Middle of the Road','Worst Case Scenario']

# choose the percentiles that you want to extract data from
percentiles = [1,50,99]

# tell the config object which future years to center on
future_years = [2050,2070]

# detect the basseline periiod from the baseline epw
baseepw = morph_io.Epw(baseline_epw_file)
baseline = baseepw.detect_baseline_range()

# set up an output directory (in this case it is relative to this notebook)
output_directory = "results"

In [None]:
# first set up the config object to pass important values to the functions
config_object = morph_config.MorphConfig(project_name, 
                                         baseline_epw_file,  
                                         user_variables, 
                                         user_pathways, 
                                         percentiles,
                                         future_years,
                                         output_directory,
                                         model_sources=None, # Empty for now, use the three default in the model
                                         baseline_range=baseline # This was detected form the EPW's baseline range but could be any tuple of two years
                                         ) 

## Morph One Variable

In [None]:
# getting the climate model from google cloud and into a clean ensemble dataframe takes three steps:

# 1. download the raw data for each model under a specified pathway and for a specific vairable
dataset_dict = access.access_cmip6_data(config_object.model_sources,
                                        # in the larger workflow normally you would iterate through all of your pathways. lets start with Best Case Scenario 
                                        "ssp126", # the config object takes the user pathway and makes a short string, for Best Case Scenario thats ssp126
                                        # in the larger workflow normally you would iterate through all of your variables. lets start with one 'pressure'
                                        "psl" # similar to the shortening of pathways, the config object creates a second lsit of variables pressure=psl
                                        )



In [None]:
# this returns a dataset of xarrays where each key in the dict is the model sources
# this is just the first member of the dict
dataset_dict[list(dataset_dict.keys())[0]]

In [None]:
# 2. Next the models need to be cleaned up a bit and given the same index, with some extra dimensions removed
# also the dataset will be constrained to the specific location of the epw file
dataset_dict = coordinate.coordinate_cmip6_data(config_object.epw.location['latitude'],
                                                config_object.epw.location['longitude'],
                                                "ssp126", 
                                                "psl",
                                                dataset_dict # this is the dict from the access step
                                                )


In [None]:
# the cleaner view of the first model source
dataset_dict[list(dataset_dict.keys())[0]]

In [None]:
# 3. Take the multiple models contianed in the dict and create a single ensemble from them using xclim
psl_ssp126_ensemble = assemble.build_cmip6_ensemble(config_object.percentiles, # tell the function where to slice the ensemble
                                                    "psl", # this is needed for some naming
                                                    dataset_dict # the dataset from the coordinate step
                                                    )

In [None]:
# now the dict has been replaced with a dataframe holding a monthly timeseries and all of the percentiles that were specified
psl_ssp126_ensemble

In [None]:
# in order to morph the pressure variable you will need the historical data from the climate models too
# you can repeat the previous steps bu for the historical pathway using a conveinence function from the workflows module
psl_historical_ensemble = morph_work.compile_climate_model_data(
    config_object.model_sources,
    "historical",
    "psl",
    config_object.epw.location["latitude"],
    config_object.epw.location["longitude"],
    config_object.percentiles,
)

In [None]:
# the same dataframe strucutre is then created for the historical pathway with a different set of dates for the index
psl_historical_ensemble

In [None]:
# choose one of the future years and set up a future range to gather the data to perofrm the delta calcs to the historical 
fut_year = config_object.future_years[0]
future_range = morph_utils.calc_period(int(fut_year), config_object.baseline_range)
print(config_object.baseline_range, future_range)

In [None]:

# using the historical data and the pathway data for specific percentile construct the climatolgies based on the baseline range and future range
psl_climatologies = assemble.calc_model_climatologies(config_object.baseline_range, 
                                                      future_range,
                                                        psl_historical_ensemble[50],
                                                        psl_ssp126_ensemble[50],
                                                        'psl')


In [None]:
# The calc climatologies function reutrns a tuple of two series. the first is the monthly mean value for the historical data and the second is for the future
psl_climatologies

In [None]:
# extract the present day psl from the EPW
present_psl = config_object.epw.dataframe["atmos_Pa"]

# morph the preesnt day using the climatologies
morphed_psl = morph_proc.morph_psl(
    present_psl, psl_climatologies[1], psl_climatologies[0]
).values

## Morphing Workflow

Repeating the above workflow for every variable, pathway, and percentile would be a bit cumbersome. Thus you can use the workflows.

In [3]:
# first build a new config object

# set the project name, this will be used at the end for naming your files
project_name = "second_morph_project"

# input the filepath for the baseline EPW file that you are morphing
baseline_epw_file = "USA_MO_Whiteman.AFB.724467_TMY3.epw"

# set a list of user variables from the following:
# ['Temperature','Humidity','Pressure','Wind','Clouds and Radiation', 'Dew Point']
user_variables = ['Temperature', 'Humidity','Pressure', 'Wind', 'Clouds and Radiation', 'Dew Point']

# set a list of climate model pathways from the following:
# ['Best Case Scenario','Middle of the Road','Worst Case Scenario']
user_pathways = ['Best Case Scenario','Middle of the Road','Worst Case Scenario']

# choose the percentiles that you want to extract data from
percentiles = [1,50,99]

# tell the config object which future years to center on
future_years = [2020,2050,2070]

# detect the basseline periiod from the baseline epw
baseepw = morph_io.Epw(baseline_epw_file)
baseline = baseepw.detect_baseline_range()

# set up an output directory (in this case it is relative to this notebook)
output_directory = "results"

# set up config object
config_object = morph_config.MorphConfig(project_name, 
                                         baseline_epw_file,  
                                         user_variables, 
                                         user_pathways, 
                                         percentiles,
                                         future_years,
                                         output_directory,
                                         model_sources=None, # Empty for now, use the three default in the model
                                         baseline_range=baseline # This was detected form the EPW's baseline range but could be any tuple of two years
                                         ) 

['tas', 'tasmax', 'tasmin', 'huss', 'psl', 'uas', 'vas', 'clt', 'rsds']


In [4]:
# get climate model data into a single dict
year_model_dict = morph_work.iterate_compile_model_data(
    config_object.model_pathways,
    config_object.model_variables,
    config_object.model_sources,
    config_object.epw.location["longitude"],
    config_object.epw.location["latitude"],
    config_object.percentiles,
)


Compiling model data for 'ssp585' and 'tasmax'.

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


ESMDataSourceError: Failed to load dataset with key='ScenarioMIP.CSIRO-ARCCSS.ACCESS-CM2.ssp585.Amon.gn'
                 You can use `cat['ScenarioMIP.CSIRO-ARCCSS.ACCESS-CM2.ssp585.Amon.gn'].df` to inspect the assets/files for this key.
                 

In [None]:
# set an empty dict to gather the results into
result_data = {}

for fut_year in config_object.future_years:
    fut_key = str(fut_year)
    result_data[fut_key] = {}
    future_range = morph_utils.calc_period(int(fut_year), config_object.baseline_range)
    for pathway in [
        pathway for pathway in config_object.model_pathways if pathway != "historical"
    ]:
        result_data[fut_key][pathway] = {}
        for percentile in config_object.percentiles:
            percentile_key = str(percentile)
            print(f"{fut_key}, {pathway}, {percentile}")
            morphed_data = morph_work.morph_epw(
                config_object.epw,
                config_object.user_variables,
                config_object.baseline_range,
                future_range,
                year_model_dict,
                pathway,
                percentile,
            )
            result_data[fut_key][pathway][percentile_key] = morphed_data

In [22]:
for year in result_data.keys():
    for pathway in result_data[year].keys():
        for percentile in result_data[year][pathway].keys():
            percentile_key = str(percentile)
            morphed_data = result_data[fut_key][pathway][percentile_key]
            morphed_data.dataframe['year'] = int(year)
            morphed_data.write_to_file(os.path.join(config_object.output_directory,f"{fut_key}_{pathway}_{percentile_key}.epw"))

## Plots

In [None]:
# lets plot the percentiles to see the range of warming
var = "dewpoint_C"

future_years = config_object.future_years
future_years.sort()
n_years = len(future_years) 

pathways = [pathway for pathway in config_object.model_pathways if pathway != "historical"]
pathways.sort()
n_pathways = len(pathways)

percentiles = config_object.percentiles
percentiles.sort()

# set up a figure where number of rows for number of pathways and number of columns for years
fig, axe = plt.subplots(n_pathways, n_years, 
                        sharex=True, sharey=True, 
                        figsize=(7*n_years,2.5*n_pathways)
                        )

# plot by pathway first
for p_n, pathway in enumerate(pathways):
    # then by year
    for y_n, year in enumerate(future_years):
        # set current subplot
        ax = axe[p_n,y_n]
        
        # plot the EPW series of data
        hist_data = config_object.epw.dataframe[var].resample("W").mean()
        ax.plot(hist_data,ls='dashed',color='k',lw=0.5, label='EPW Data')
        ax.title.set_text(f"{pathway}_{year}")
        
        # then multiple percentiles on the same plot
        for percentile in percentiles:
            plot_data = result_data[str(year)][pathway][str(percentile)].dataframe[var].resample("W").mean()
            ax.plot(plot_data, lw=0.5, label=percentile)
        ax.grid()
            
        
        

axe[1,1].legend(loc='lower center', title='Percentiles')
plt.suptitle(f"{var} Weekly Mean")
plt.tight_layout()

# Intake Issues

In [1]:

import intake
import intake_esm
cat_url = intake_esm.tutorial.get_url("google_cmip6")
cat = intake.open_esm_datastore(cat_url)
cat_subset = cat.search(
   experiment_id=["historical", "ssp585"],
   table_id="Oyr",
   variable_id="o2",
   grid_label="gn",
   )

dset_dict = cat_subset.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


  .applymap(type)
  .applymap(type)
  .applymap(type)


In [12]:
cat['CMIP.IPSL.IPSL-CM6A-LR.historical.Oyr.gn']

esm_datasource:
  args:
    aggregations:
    - !!python/object:intake_esm.cat.Aggregation
      __dict__:
        attribute_name: variable_id
        options: {}
        type: !!python/object/apply:intake_esm.cat.AggregationType
        - union
      __pydantic_extra__: null
      __pydantic_fields_set__: !!set
        attribute_name: null
        options: null
        type: null
      __pydantic_private__: null
    - !!python/object:intake_esm.cat.Aggregation
      __dict__:
        attribute_name: member_id
        options:
          compat: override
          coords: minimal
        type: !!python/object/apply:intake_esm.cat.AggregationType
        - join_new
      __pydantic_extra__: null
      __pydantic_fields_set__: !!set
        attribute_name: null
        options: null
        type: null
      __pydantic_private__: null
    - !!python/object:intake_esm.cat.Aggregation
      __dict__:
        attribute_name: dcpp_init_year
        options:
          compat: override
         