In [4]:
import xarray as xr
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from datetime import datetime, timedelta

##This code clears all the variables from the workspace; can help avoid memory errors
def clear_all():
    """Clears all the variables from the workspace of the spyder application."""
    gl = globals().copy()
    for var in gl:
        if var[0] == '_': continue
        if 'func' in str(globals()[var]): continue
        if 'module' in str(globals()[var]): continue

        del globals()[var]
if __name__ == "__main__":
    clear_all()

Import forecast data (2017-18)

In [5]:
model_name = 'ecmwfsipn'
model_type = 'forecast'
filepath = '/home/disk/sipn/nicway/data/model/{model_name}/{model_type}/sipn_nc_agg/'.format(model_name=model_name,
                                              model_type=model_type)
filenames = xr.open_mfdataset(filepath+'/*.nc',concat_dim='init_time')
print(filenames)

<xarray.Dataset>
Dimensions:       (ensemble: 51, fore_time: 215, init_time: 25, nregions: 15)
Coordinates:
  * fore_time     (fore_time) timedelta64[ns] 1 days 2 days ... 215 days
  * ensemble      (ensemble) int32 0 1 2 3 4 5 6 7 8 ... 43 44 45 46 47 48 49 50
  * nregions      (nregions) int64 99 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    region_names  (nregions) object 'panArctic' ... 'Central Arctic'
  * init_time     (init_time) datetime64[ns] 2017-11-01 ... 2019-11-01
Data variables:
    Extent        (init_time, ensemble, fore_time, nregions) float64 dask.array<shape=(25, 51, 215, 15), chunksize=(1, 51, 215, 15)>


Create Kara-Laptev and E-Sib/Beauf/Chukchi

In [6]:
region_names = filenames.region_names
region_names = np.append(region_names,['Kara-Laptev','East-Siberian-Beaufort-Chukchi'])
init_times = filenames.init_time
forecast_times = filenames.fore_time
extent = filenames.Extent
##chunk sizes in dimensions of [init_time x ensemble x fore_time x region]
chunk_sizes = filenames['Extent'].shape
extent_KL = extent[:,:,:,8] + extent[:,:,:,9]
extent_ESBC = extent[:,:,:,10] + extent[:,:,:,11] + extent[:,:,:,12]
extent_extras= np.stack((extent_KL,extent_ESBC),axis=3)
extent = np.concatenate((extent,extent_extras),axis=3)

For now, we'll define VRILEs as the 5th percentile events of 5-day changes

In [7]:
no_ens = len(filenames.ensemble) ##no. of ensemble members
no_day_change = 5 ##looking at 5 day changes
no_forecast_periods = len(forecast_times)

initialize our output. Since SIE is a time series, we'll use Pandas and a DataFrame. For now, we will track initialization date, valid date, the actual SIE, lead time (in days, this will be a timedelta object), 5-day change in SIE (this will be recorded for the center day), ensemble number, and region.

In [25]:
d_SIC_ALL_ens = pd.DataFrame(columns=["I (init date)",
                                      "V (valid date)",
                                      "V_mon (valid date month)",
                                      "V_yr (valid date year)",
                                      "SIE",
                                      "lead time (V - I)",
                                      "ensemble",
                                      "region"])

Loop through each region, then each forecast time and just calculcate d_SIE and add to dataframe

In [26]:
##Create integers for each region
##I could probably write this better with more uses of groupby. 
reg_sel = np.arange(0,17)
region_name = 'panArctic'
##Outer loop will go through each region
#for ireg in reg_sel:
ireg = 0
    #region_name = region_names[ireg]
region_select = ireg
print(region_name)
##Next loop will go through each init time
for itime in np.arange(0,len(init_times)):
#itime = 0
    init_times_df = pd.DatetimeIndex(init_times.values)
    init_select = init_times_df[itime]#.to_dataset()
    check_yr = pd.to_datetime(init_select).year
    if check_yr != 2018:
        print('not the right yr')
        continue
    print(init_select)

    ##We'll create another DataFrame inside this loop; we'll append it 
    ##to the big DataFrame outside of this loop.
    d_SIC_lead_time = pd.DataFrame({"I (init date)":pd.Series(init_select).repeat(len(forecast_times)*no_ens),
                                "V (valid date)":"",
                                "V_mon (valid date month)":"",
                                "V_yr (valid date year)":"",
                                "SIE":"",
                                "lead time (days)":"",
                                #"d_SIC (V - I)":"",
                                "ensemble":"",
                                "region":""})
    ##Now, we loop through our ensemble members
    for iens in np.arange(0,no_ens):
        #iens = 0
        ##Keep track of the correct indices so we don't have to append ad infitum
        save_ind = iens*(no_forecast_periods) + np.arange(0,no_forecast_periods)
        #print('ensemble no ',iens)
        #        d_SIC_lead_time['ensemble'].iloc[ens_ind] = np.tile(iens,no_forecast_periods*len(init_times))
        #Subset our sea ice extent by init_tim, ensemble no., and region
        I_test = extent[itime,iens,:,region_select]
        ##since we're doing 5-day means, our first and last 2 dates aren't included
        ind_select = np.arange(0,no_forecast_periods) 
        #ind_select = 1
        #min_range = ind_select - 2
        #max_range = ind_select + 2
        ##Here's where we actually calculate that 5-day change in SIE
        #delta_extent = I_test[max_range] - I_test[min_range]
        #d_SIC_lead_time['d_SIC (V - I)'].iloc[save_ind] = delta_extent
        ##Now, we get the dates that correspond to our valid date and number of lead days
        forecast_dates = ind_select.astype('timedelta64[D]')
        date_change = pd.Series(init_select).repeat(len(forecast_dates)) + forecast_dates
        d_SIC_lead_time['V (valid date)'].iloc[save_ind] = pd.to_datetime(date_change.values)
        d_SIC_lead_time['V_mon (valid date month)'].iloc[save_ind] = pd.to_datetime(date_change.values).month
        d_SIC_lead_time['V_yr (valid date year)'].iloc[save_ind] = pd.to_datetime(date_change.values).year
        ##We want to save lead time as a time delta, not a date
        d_SIC_lead_time["lead time (days)"].iloc[save_ind] = pd.to_timedelta(forecast_dates).days
        ##This is just for saving files, because Python is 0-indexed but our ensemble no isn't
        ens_no = iens + 1
        ##Save info about our ensemble, region, and raw SIE data
        d_SIC_lead_time['ensemble'].iloc[save_ind] = np.tile(ens_no,len(ind_select))
        d_SIC_lead_time['region'].iloc[save_ind] = np.tile(region_name,len(ind_select))
        d_SIC_lead_time['SIE'].iloc[save_ind] = I_test[ind_select]
        #d_SIC_lead_time
    if itime == 0:
        df_ALL_init = d_SIC_lead_time
    else:        
        df_ALL_init = df_ALL_init.append(d_SIC_lead_time)
    #    
if ireg == 0:
    d_SIC_ALL_ens = df_ALL_init
    #filename_full = filepath_save+'{model_name}_{model_type}_SIE_d_SIE_{d_days}day_change_lead_time_ALL_REGIONS_ALL_ENS.csv'.format(model_name=model_name,
    #               model_type=model_type,d_days=no_day_change)
    #d_SIC_ALL_ens.to_csv(filename_full)
else:
    d_SIC_ALL_ens = d_SIC_ALL_ens.append(df_ALL_init)
    #filename_full = filepath_save+'{model_name}_{model_type}_SIE_d_SIE_{d_days}day_change_lead_time_ALL_REGIONS_ALL_ENS.csv'.format(model_name=model_name,
    #               model_type=model_type,d_days=no_day_change)
    #d_SIC_ALL_ens.to_csv(filename_full)

panArctic
not the right yr
not the right yr
2018-01-01 00:00:00
2018-02-01 00:00:00
2018-03-01 00:00:00
2018-04-01 00:00:00
2018-05-01 00:00:00
2018-06-01 00:00:00
2018-07-01 00:00:00
2018-08-01 00:00:00
2018-09-01 00:00:00
2018-10-01 00:00:00
2018-11-01 00:00:00
2018-12-01 00:00:00
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr
not the right yr


In [27]:
d_SIC_ALL_ens.head()

Unnamed: 0,I (init date),V (valid date),V_mon (valid date month),V_yr (valid date year),SIE,lead time (days),ensemble,region
0,2017-11-01,2017-11-01 00:00:00,11,2017,7.78442,0,1,panArctic
0,2017-11-01,2017-11-02 00:00:00,11,2017,7.83142,1,1,panArctic
0,2017-11-01,2017-11-03 00:00:00,11,2017,7.91983,2,1,panArctic
0,2017-11-01,2017-11-04 00:00:00,11,2017,8.01465,3,1,panArctic
0,2017-11-01,2017-11-05 00:00:00,11,2017,8.112,4,1,panArctic


In [28]:
filepath_save = '/home/disk/sipn/mcmcgraw/data/VRILE/'
filename_full = filepath_save+'panArctic_ONLY_{model_name}_{model_type}_2018_TEST_SIE_ALL_ENS.csv'.format(model_name=model_name,
                       model_type=model_type)
d_SIC_ALL_ens.to_csv(filename_full)

Unnamed: 0,I (init date),V (valid date),V_mon (valid date month),V_yr (valid date year),SIE,lead time (days),ensemble,region
0,2017-11-01,2017-11-01 00:00:00,11,2017,7.78442,0,1,panArctic
0,2017-11-01,2017-11-02 00:00:00,11,2017,7.83142,1,1,panArctic
0,2017-11-01,2017-11-03 00:00:00,11,2017,7.91983,2,1,panArctic
0,2017-11-01,2017-11-04 00:00:00,11,2017,8.01465,3,1,panArctic
0,2017-11-01,2017-11-05 00:00:00,11,2017,8.112,4,1,panArctic


array([ 7.78441742,  7.83142311,  7.91983416,  8.01464667,  8.11200442,
        8.18626259,  8.21850502,  8.31268255,  8.38156717,  8.47486843,
        8.53698182,  8.59216804,  8.6338454 ,  8.69712699,  8.70855732,
        8.73850021,  8.78681377,  8.867601  ,  8.99561318,  9.06292156,
        9.15511915,  9.34019952,  9.55058701,  9.66852088,  9.78445879,
        9.93221879, 10.03715923, 10.17063641, 10.29103297, 10.41197011,
       10.52009789, 10.46663019, 10.5201628 , 10.57581116, 10.61548756,
       10.66532898, 10.79096096, 10.88189781, 10.92219665, 10.99827278,
       11.13999691, 11.25174559, 11.31047796, 11.37584168, 11.44927361,
       11.61838626, 11.72450186, 11.81223004, 11.85305438, 11.96710083,
       12.01646628, 12.07046576, 12.15397045, 12.18767914, 12.25667294,
       12.31608149, 12.3339357 , 12.43014796, 12.49424605, 12.55058884,
       12.59064551, 12.62055647, 12.61961282, 12.65225347, 12.67196553,
       12.72016707, 12.75533076, 12.75847947, 12.77484512, 12.83