# Import ERA-5 Climate data and filter by trap locations to reduce size

In [8]:
import numpy as np
from datetime import datetime
import pandas as pd
import xarray as xr
import flox
import glob
import os

  data = yaml.load(f.read()) or {}


In [4]:
def downsample_df(dataset):
    dataset['t2m'] -= 273.15
    dataset.t2m.attrs['units'] = 'deg C'
    max_daily = dataset.resample(time='D').max(dim='time')
    min_daily = dataset.resample(time='D').min(dim='time')
    mean_daily = dataset.resample(time='D').mean(dim='time')
    
    max_daily = max_daily.rename({'t2m':'max_t2m'})
    min_daily = min_daily.rename({'t2m':'min_t2m'})
    mean_daily = mean_daily.rename({'t2m':'mean_t2m'})
    
    merged_data = xr.merge([max_daily,min_daily,mean_daily])
    
    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')
    
    df = merged_data.to_dataframe()
    df = df.reset_index()
    
    return(df)

In [5]:
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [45]:
model_dir = "data/"

In [32]:
state = "Colorado"
state_code = "CO" # "AZ_new"
yr1 = 2000
yr2 = 2009

In [33]:
f1 = xr.open_dataset(model_dir + "t2m/" + state_code+"_t2m_" + str(yr1) + "-" + str(yr2) + ".nc", decode_times = True)

In [34]:
f1

In [35]:
df1 = downsample_df(f1) 

In [36]:
df_all = pd.concat([df1]) # 

In [39]:
df_all.to_csv("downsampled/"+state+"_t2m_" + str(yr1) + "-" + str(yr2) + ".csv")

In [11]:
df_all

Unnamed: 0,time,longitude,latitude,max_t2m,min_t2m,mean_t2m,year,month,day
0,1960-01-01,-128.0,54.00,-8.070312,-13.675690,-11.781507,1960,January,01
1,1960-01-01,-128.0,53.75,-6.402344,-12.259430,-10.078487,1960,January,01
2,1960-01-01,-128.0,53.50,-5.380890,-11.684448,-9.589625,1960,January,01
3,1960-01-01,-128.0,53.25,-4.811218,-11.081665,-8.725226,1960,January,01
4,1960-01-01,-128.0,53.00,-3.859985,-9.491852,-7.812855,1960,January,01
...,...,...,...,...,...,...,...,...,...
3221850,1978-12-31,-104.0,32.50,8.789276,-0.999756,3.131788,1978,December,31
3221851,1978-12-31,-104.0,32.25,8.954681,-0.202484,4.075764,1978,December,31
3221852,1978-12-31,-104.0,32.00,9.472046,0.621613,4.973324,1978,December,31
3221853,1978-12-31,-104.0,31.75,10.195801,1.213898,5.663390,1978,December,31


## Summary statistics

In [40]:
year1 = '2000'
year2 = '2009'

In [41]:
def downsample_temps(filename):

    dataset = xr.open_dataset(filename)

    dataset['t2m'] -= 273.15
    dataset.t2m.attrs['units'] = 'deg C'
    max_daily = dataset.resample(time='D').max(dim='time')
    min_daily = dataset.resample(time='D').min(dim='time')
    mean_daily = dataset.resample(time='D').mean(dim='time')

    max_daily = max_daily.rename({'t2m':'max_t2m'})
    min_daily = min_daily.rename({'t2m':'min_t2m'})
    mean_daily = mean_daily.rename({'t2m':'mean_t2m'})

    merged_data = xr.merge([max_daily,min_daily,mean_daily])

    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')

    df = merged_data.to_dataframe()
    df = df.reset_index()

    df_means = df.groupby(['latitude','longitude','month','year'])[['min_t2m','max_t2m','mean_t2m']].mean()
    df_means = df_means.reset_index()

    df['gdd'] = (df['max_t2m'] + df['min_t2m'])/2 - 5.6
    df['gdd'] = np.where(df['gdd'] < 0, 0, df['gdd'])

    df['gdd_sum'] = df.groupby(['latitude','longitude']).cumsum()['gdd']

    df['julian'] = pd.DatetimeIndex(df['time']).dayofyear
    hatch_pred = df[df.gdd_sum>= 300].groupby(['latitude','longitude','year']).min('julian')
    hatch_pred['julian'] = hatch_pred['julian'] + 69

    hatch_pred_small = hatch_pred.reset_index()[['latitude','longitude','year','julian']]

    gdd_before = df[df.gdd_sum< 300].groupby(['latitude','longitude','year']).max('julian').reset_index()[['latitude','longitude','year','gdd_sum']].rename(columns = {'gdd_sum' : 'gdd_subtract'})

    season_gdds = pd.merge(hatch_pred_small,df, how = 'left')

    season_gdds = pd.merge(season_gdds,gdd_before,how = 'left')

    season_gdds['gdd_season'] = season_gdds['gdd_sum'] - season_gdds['gdd_subtract']
    season_gdds = season_gdds[['latitude','longitude','year','julian','gdd_season']]

    return(df_means,season_gdds)

In [46]:
filenames = listdir_nohidden(model_dir + "t2m/")

tas_df = pd.DataFrame()
gdd_df = pd.DataFrame()

for file in filenames:

    op = downsample_temps(file)

    tas_df = tas_df.append(op[0])
    gdd_df = gdd_df.append(op[1])

tas_df.to_csv("downsampled/means_t2m_" + year1 + "-" + year2 + "_ERA5.csv")
gdd_df.to_csv("downsampled/gdd_season_" + year1 + "-" + year2 + "_ERA5.csv")


In [47]:
tas_df

Unnamed: 0,latitude,longitude,month,year,min_t2m,max_t2m,mean_t2m
0,37.25,-107.00,April,2000,-3.239170,11.994127,4.100599
1,37.25,-107.00,April,2001,-3.042498,8.344407,2.196847
2,37.25,-107.00,April,2002,-2.296647,13.225080,5.389048
3,37.25,-107.00,April,2003,-4.387143,8.423391,1.675321
4,37.25,-107.00,April,2004,-2.547872,8.415641,2.750572
...,...,...,...,...,...,...,...
11995,39.50,-104.75,September,2005,10.243055,27.135633,17.919464
11996,39.50,-104.75,September,2006,6.220745,21.205975,13.193229
11997,39.50,-104.75,September,2007,10.571645,25.693346,17.569292
11998,39.50,-104.75,September,2008,8.324207,24.327116,15.454497


In [48]:
gdd_df

Unnamed: 0,latitude,longitude,year,julian,gdd_season
0,37.25,-107.00,2000,238,737.132935
1,37.25,-107.00,2001,70,
2,37.25,-107.00,2002,70,
3,37.25,-107.00,2003,70,
4,37.25,-107.00,2004,70,
...,...,...,...,...,...
995,39.50,-104.75,2005,70,
996,39.50,-104.75,2006,70,
997,39.50,-104.75,2007,70,
998,39.50,-104.75,2008,70,
