In [56]:
import xcast as xc
import datetime as dt
import numpy as np
from pathlib import Path
import xarray as xr
import os
import time
import cartopy.crs as ccrs
import cartopy.feature as cf
import matplotlib.pyplot as plt
import glob

import practical_helper_functions as helper

# automatically reloads the configuration file once updated and saved so you don't have to restart the kernel
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [57]:
# setup the folder where you want to work for this project
# either type in the location, or place this notebook in the folder where you want your project to live and set project_dir = os.getcwd()
project_dir = os.getcwd()#"/Users/katie/Desktop/trial_pacisl" 

#make subdirectores to organize your work within the project if they don't already exist
practical_data_dir = os.path.join(project_dir, 'practical_data')
nmme_nc_dir = '/cpc/int_desk/pac_isl/data/processed/nmme/nc_files'
cmorph_nc_dir = '/cpc/int_desk/pac_isl/data/processed/cmorph/nc_files'
chirps_nc_dir = '/cpc/int_desk/pac_isl/data/processed/chirps/nc_files'

In [63]:
initial_dates = [(2023, 7,1), (2023, 8, 1), (2023, 9, 1), (2023, 10, 1), (2023, 11,1), (2023, 12,1),
                 (2024, 1, 1), (2024, 2, 1), (2024, 3, 1), (2024, 4, 1), (2024, 5, 1), (2024, 6, 1)]

gcms = ['NMME']

#predictor extent, zone over which you want to train your model
predictor_extent = {
        'west':  135,
        'east': 200,  
        'north': 10,  
        'south': -30
      }

#where you want to target your final analysis
predictand_extent = {
        'west':  153,
        'east': 183,  
        'north': 3,  
        'south': -20
      }

### get all target / initialization periods for each initialization date

In [64]:
initial_months, initial_month_names, target_seasons, target_months = [], [], [], []
 
for i in initial_dates:
    leads = [['1', '3'],['2', '4'], ['3','5']]
    initial_month = dt.datetime(*i).month
    initial_months.append(initial_month)
    initial_month_names.append(helper.number_to_month_name_dictionary[initial_month])
    target_month = []
    target_seas = []
    for l in leads:
        target_low = helper.number_to_month_name_dictionary[(initial_month + float(l[0]))%12]
        target_mid = helper.number_to_month_name_dictionary[(initial_month + float(l[0])+1)%12]
        target_high = helper.number_to_month_name_dictionary[(initial_month + float(l[1]))%12]
        target_seas.append('-'.join([target_low, target_high]))
        target_month.append(target_low[0] + target_mid[0] + target_high[0])
    target_seasons.append(target_seas)
    target_months.append(target_month)

### setting up nmme data for analysis
these netcdf files when processed are counting forwards, 32 years for single season analysis (1991-2022), 75 years for three season analysis (25 years times 3 seasons); set up this time as the base time to align the other files as the one 'T' index for each initialized month

#### one season setup

In [65]:
# read in hindcast and forecast data
training_length = 'one_seas'
oneseas_hindcast_data_im, oneseas_forecast_data_im = [], []
for i, im in enumerate(initial_month_names):
    hindcast_data, forecast_data = [], []
    for t, target in enumerate(target_months[i]):
        hindcast_data_l, forecast_data_l = [], []
        for gcm in gcms:
            gcm_hindcast_download_file = '{}*.nc'.format('_'.join([im, 'ld' + leads[t][0], training_length, gcm, 'hind']))
            gcm_forecast_download_file = '{}*.nc'.format('_'.join([im, 'ld' + leads[t][0], training_length, gcm, 'fcst']))
            g = xr.open_dataset(glob.glob(os.path.join(nc_dir, gcm_hindcast_download_file))[0])
            f = xr.open_dataset(glob.glob(os.path.join(nc_dir, gcm_forecast_download_file))[0])
            g = helper.prep_names(g, helper.coordinate_conversion).expand_dims({'M':[gcm]}).dropna(dim = 'Y')
            f = helper.prep_names(f, helper.coordinate_conversion).expand_dims({'M':[gcm]}).dropna(dim = 'Y')
            hindcast_data_l.append(g)
            forecast_data_l.append(f)
        hindcast_data_l = xr.concat(hindcast_data_l, dim = 'M')
        forecast_data_l = xr.concat(forecast_data_l, dim = 'M')
        hindcast_data_l = hindcast_data_l.assign_coords({'L':t+1})
        forecast_data_l = forecast_data_l.assign_coords({'L':t+1})
        hindcast_data.append(hindcast_data_l)
        forecast_data.append(forecast_data_l)
    #create one dataset across all lead times of interest
    forecast_data = xr.concat(forecast_data, dim = 'L')
    #check all hindcast years are available for all lead times and only keep dataset with intersecting years
    hindcast_data = xr.concat(hindcast_data, dim = 'L')

    oneseas_hindcast_data_im.append(hindcast_data)
    oneseas_forecast_data_im.append(forecast_data)

In [None]:
oneseas_hindcast_data_im[0]

#### three season setup

In [66]:
# read in hindcast and forecast data
training_length = 'three_seas'
threeseas_hindcast_data_im, threeseas_forecast_data_im = [], []
for i, im in enumerate(initial_month_names):
    hindcast_data, forecast_data = [], []
    for t, target in enumerate(target_months[i]):
        hindcast_data_l, forecast_data_l = [], []
        for gcm in gcms:
            gcm_hindcast_download_file = '{}*.nc'.format('_'.join([im, 'ld' + leads[t][0], training_length, gcm, 'hind']))
            gcm_forecast_download_file = '{}*.nc'.format('_'.join([im, 'ld' + leads[t][0], training_length, gcm, 'fcst']))
            g = xr.open_dataset(glob.glob(os.path.join(nc_dir, gcm_hindcast_download_file))[0])
            f = xr.open_dataset(glob.glob(os.path.join(nc_dir, gcm_forecast_download_file))[0])
            g = helper.prep_names(g, helper.coordinate_conversion).expand_dims({'M':[gcm]}).dropna(dim = 'Y')
            f = helper.prep_names(f, helper.coordinate_conversion).expand_dims({'M':[gcm]}).dropna(dim = 'Y')
            hindcast_data_l.append(g)
            forecast_data_l.append(f)
        hindcast_data_l = xr.concat(hindcast_data_l, dim = 'M')
        forecast_data_l = xr.concat(forecast_data_l, dim = 'M')
        hindcast_data_l = hindcast_data_l.assign_coords({'L':t+1})
        forecast_data_l = forecast_data_l.assign_coords({'L':t+1})
        hindcast_data.append(hindcast_data_l)
        forecast_data.append(forecast_data_l)
    #create one dataset across all lead times of interest
    forecast_data = xr.concat(forecast_data, dim = 'L')
    #check all hindcast years are available for all lead times and only keep dataset with intersecting years
    hindcast_data = xr.concat(hindcast_data, dim = 'L')
    
    #crop model data to predictor extent
    hindcast_360 = helper.adjust_longitude_to_360(hindcast_data, 'X').sortby('Y', ascending = True).sortby('X', ascending = True)
    hindcast_comp = hindcast_360.sel(X= slice(predictor_extent['west'], predictor_extent['east']),
                            Y = slice(predictor_extent['south'], predictor_extent['north']))
    forecast_360 = helper.adjust_longitude_to_360(forecast_data, 'X').sortby('Y', ascending = True).sortby('X', ascending = True)
    forecast_comp = forecast_360.sel(X= slice(predictor_extent['west'], predictor_extent['east']),
                        Y = slice(predictor_extent['south'], predictor_extent['north']))

    threeseas_hindcast_data_im.append(hindcast_comp)
    threeseas_forecast_data_im.append(forecast_comp)

### setting up cmorph data for analysis
this data was processed in the same manner as nmme, so should have the exact same time stamp, but calculate the intersecting T values to be absolutely certain before combining across leads

In [67]:
threeseas_cmorph_im = []
for i, im in enumerate(initial_month_names):
    obs_leads = []
    for t, target in enumerate(target_months[i]):
        obs_file = '{}*.nc'.format('_'.join([im, 'ld' + str(t + 1), 'CMORPH']))
        Y_raw = xr.open_dataset(glob.glob(os.path.join(nc_dir, obs_file))[0])
        Y = helper.prep_names(Y_raw, helper.coordinate_conversion)
        Y = getattr(Y, [i for i in Y.data_vars][0])
        Y = Y.expand_dims({'L':[t+1], 'M':[0]}).to_dataset(name = 'precip')
        obs_leads.append(Y)
    obs_leads = xr.concat(obs_leads, dim = 'L')
    
    #crop observations to target zone
    obs_360 = helper.adjust_longitude_to_360(obs_leads, 'X').sortby('Y', ascending = True).sortby('X', ascending = True)
    obs_comp = obs_360.sel(X= slice(predictand_extent['west'], predictand_extent['east']),
                            Y = slice(predictand_extent['south'], predictand_extent['north']))
    threeseas_cmorph_im.append(obs_comp)

### setting up chirps data for analysis
only keep years available to NMME: 1991-2016
label all years as the NMME years to keep it simple (given that netcdf is counting forwards) once the years extracted are correct

In [68]:
oneseas_ucsb_im = []
for i, im in enumerate(initial_month_names):
    obs_leads = []
    for t, target in enumerate(target_months[i]):
        obs_file = '{}*.nc'.format('_'.join([target, 'UCSB0p05_pac-islands']))
        Y_raw = xr.open_dataset(glob.glob(os.path.join(nc_dir, obs_file))[0])
        years_of_interest = helper.getYears(1991,2022)
        Y_raw = Y_raw.sel(year=Y_raw.year.isin(years_of_interest))
        Y = helper.prep_names(Y_raw, helper.coordinate_conversion)
        Y = getattr(Y, [i for i in Y.data_vars][0])
        Y = Y.expand_dims({'L':[t+1], 'M':[0]}).drop('season')
        Y = Y.to_dataset(name = 'precip')
        model = oneseas_hindcast_data_im[i].isel(L=t).to_dataset(name = 'precip')
        #update observations to have same time dimension as the model hindcasts
        Y_update = []
        for oneyear, year in enumerate(Y.T.values):
            Y_year = Y.sel(T=year)
            Y_year = Y_year.assign_coords({'T': model.isel(T=oneyear).T.values})
            Y_update.append(Y_year)
        Y_update = xr.concat(Y_update, dim = 'T')
        obs_leads.append(Y_update)
    obs_leads = xr.concat(obs_leads, dim = 'L')
    
    #crop observations to target zone
    obs_360 = helper.adjust_longitude_to_360(obs_leads, 'X').sortby('Y', ascending = True).sortby('X', ascending = True)
    obs_comp = obs_360.sel(X= slice(predictand_extent['west'], predictand_extent['east']),
                            Y = slice(predictand_extent['south'], predictand_extent['north']))
    oneseas_ucsb_im.append(obs_comp)

AttributeError: 'Dataset' object has no attribute 'to_dataset'