# pyWBM Future Cleaning
- This code is used for cleaning and processing future pyWBM & LOCA2 projections
- Allows us to take this data (panel) and use our coefficients from notebook 5 for future projections

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import dask
import os
import glob as glob

## Growing degree days

In [2]:
from functions_2a import degreeDays, yearlyCalculationSum

### Filepaths & Inputs

In [3]:
# get cmip6 model names used in loca2
base_loca_paths_for_models = "/storage/group/pches/default/public/LOCA2/"
models = sorted(glob.glob(f"{base_loca_paths_for_models}*"))
model_names = [os.path.basename(m) for m in models][:-2]

# ssp scenarios used in pyWBM are 245 and 370
ssps = ["245", "370"]

# intitalizataions, only using r1i1p1f1 for now, some runs have more than 1 init
initializations = ["r1i1p1f1"]

# get the values needed for ddays 

# loca2 is in chunks of ~30 years 
time_frames = ["2015-2044", "2045-2074", "2075-2100"]

# the full path for reference = "/storage/group/pches/default/public/LOCA2/ACCESS-CM2/0p0625deg/r1i1p1f1/ssp245/tasmin"

In [6]:
# each loca2 file is 10GB (large)
if __name__ == "__main__":
    for model_name_i in model_names[:1]:
        for initialization_i in initializations:
            for ssp_i in ssps[:1]:
                for time_frame_i in time_frames:
                    # tmax file
                    file_path_i_tmax = f"{base_loca_paths_for_models}{model_name_i}/0p0625deg/{initialization_i}/ssp{ssp_i}/tasmax"
                    file_name_i_tmax = f"tasmax.{model_name_i}.ssp{ssp_i}.{initialization_i}.{time_frame_i}.LOCA_16thdeg_v20220413.nc"
                    # tmin file
                    file_path_i_tmin = f"{base_loca_paths_for_models}{model_name_i}/0p0625deg/{initialization_i}/ssp{ssp_i}/tasmin"
                    file_name_i_tmin = f"tasmin.{model_name_i}.ssp{ssp_i}.{initialization_i}.{time_frame_i}.LOCA_16thdeg_v20220413.nc"
                    
                    # combing them for usage in degree day calculation
                    try:
                        LOCA2_tmax = xr.open_dataset(f"{file_path_i_tmax}/{file_name_i_tmax}", chunks='auto').rename({"tasmax": "tmax"})
                        LOCA2_tmin = xr.open_dataset(f"{file_path_i_tmin}/{file_name_i_tmin}", chunks='auto').rename({"tasmin": "tmin"})
                        LOCA2_combined = xr.merge([LOCA2_tmax, LOCA2_tmin])
                        
                    except FileNotFoundError:
                        print(f"Issue with file location, skipping {file_path_i_tmax}/{file_name_i_tmax} or {file_path_i_tmin}/{file_name_i_tmin}")
                        break

                    # this inputs some big daily chunked dataset, and outputs the gdd & edd binned using pyWBM futures
                    results_season_and_soilm = []
                    for year in np.unique(LOCA2_combined.time.dt.year)[:1]:
                        ds_slice = LOCA2_combined.sel(time=slice(f"{year}-04-01", f"{year}-09-30")).chunk({"time": 1})
                        gdd_future = gdd_future.groupby("time.year").sum("time") # this is seasonal growing degree day spatially, temporally for combination of i of input parameters
                        edd_future = degreeDays(ds_slice, 'edd')

                        # now bin everything and save appropriately. keep it all in memory, avoid saving intermediary steps
                        results_season_and_soilm.append(ds_slice)
                    
                    ds_all_seasons = xr.concat(results_season_and_soilm, dim="time")            

In [11]:
edd_future = degreeDays(ds_slice, 'edd')
edd_future

Unnamed: 0,Array,Chunk
Bytes,312.36 MiB,1.71 MiB
Shape,"(183, 474, 944)","(1, 474, 944)"
Dask graph,183 chunks in 37 graph layers,183 chunks in 37 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 312.36 MiB 1.71 MiB Shape (183, 474, 944) (1, 474, 944) Dask graph 183 chunks in 37 graph layers Data type float32 numpy.ndarray",944  474  183,

Unnamed: 0,Array,Chunk
Bytes,312.36 MiB,1.71 MiB
Shape,"(183, 474, 944)","(1, 474, 944)"
Dask graph,183 chunks in 37 graph layers,183 chunks in 37 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
