## Import ERA-5 Temperature and downsample from hourly to daily and monthly 

In [None]:
import numpy as np
from datetime import datetime
import pandas as pd
import xarray as xr
import flox
import glob
import os

In [None]:
def downsample_df(dataset):
    dataset['t2m'] -= 273.15
    dataset.t2m.attrs['units'] = 'deg C'
    max_daily = dataset.resample(time='D').max(dim='valid_time')
    min_daily = dataset.resample(time='D').min(dim='valid_time')
    mean_daily = dataset.resample(time='D').mean(dim='valid_time')
    
    max_daily = max_daily.rename({'t2m':'max_t2m'})
    min_daily = min_daily.rename({'t2m':'min_t2m'})
    mean_daily = mean_daily.rename({'t2m':'mean_t2m'})
    
    merged_data = xr.merge([max_daily,min_daily,mean_daily])
    
    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')
    
    df = merged_data.to_dataframe()
    df = df.reset_index()
    
    return(df)

In [None]:
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [None]:
model_dir = "/Volumes/My Book/Climate/ERA/gaps/ALL_old2/"

In [None]:
state = "WUS"
state_code = "WUS" # "AZ_new"
yr1 = 1960
yr2 = 1978

In [None]:
f1 = xr.open_dataset(model_dir + state_code+"_t2m_" + str(yr1) + "-" + str(yr2) + ".nc", decode_times = True)

In [None]:
df1 = downsample_df(f1) 

In [None]:
df_all = pd.concat([df1]) # 

In [None]:
df_all.to_csv("downsampled/"+state+"_t2m_" + str(yr1) + "-" + str(yr2) + ".csv")

### Summary statistics- Converting from daily to monthly mean, maximum, and minimum temperature at 2 meters

In [None]:
def downsample_temps(filename):

    dataset = xr.open_dataset(filename)

    dataset['t2m'] -= 273.15
    dataset.t2m.attrs['units'] = 'deg C'
    max_daily = dataset.resample(time='D').max(dim='time')
    min_daily = dataset.resample(time='D').min(dim='time')
    mean_daily = dataset.resample(time='D').mean(dim='time')

    max_daily = max_daily.rename({'t2m':'max_t2m'})
    min_daily = min_daily.rename({'t2m':'min_t2m'})
    mean_daily = mean_daily.rename({'t2m':'mean_t2m'})

    merged_data = xr.merge([max_daily,min_daily,mean_daily])

    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')

    df = merged_data.to_dataframe()
    df = df.reset_index()

    df_means = df.groupby(['latitude','longitude','month','year'])[['min_t2m','max_t2m','mean_t2m']].mean()
    df_means = df_means.reset_index()

    df['gdd'] = (df['max_t2m'] + df['min_t2m'])/2 - 5.6
    df['gdd'] = np.where(df['gdd'] < 0, 0, df['gdd'])

    df['gdd_sum'] = df.groupby(['latitude','longitude']).cumsum()['gdd']

    df['julian'] = pd.DatetimeIndex(df['time']).dayofyear

    hatch_pred = df[df.gdd_sum>= 300].groupby(['latitude','longitude','year']).min('julian').rename(columns = {'julian' : 'hatch'})
    hatch_pred['julian'] = hatch_pred['hatch'] + 69

    hatch_pred_small = hatch_pred.reset_index()[['latitude','longitude','year','julian']]

    gdd_before = df[df.gdd_sum< 300].groupby(['latitude','longitude','year']).max('julian').reset_index()[['latitude','longitude','year','gdd_sum','julian']].rename(columns = {'gdd_sum' : 'gdd_subtract', 'julian' : 'hatch'})
    gdd_before['hatch'] = gdd_before['hatch'] + 1

    season_gdds = pd.merge(hatch_pred_small,df, how = 'left')

    season_gdds = pd.merge(season_gdds,gdd_before,how = 'left')

    season_gdds['gdd_season'] = season_gdds['gdd_sum'] - season_gdds['gdd_subtract']
    season_gdds = season_gdds[['latitude','longitude','year','hatch','gdd_season']]

    return(df_means,season_gdds)

In [None]:
year1 = '1940'
year2 = '1959'

In [None]:
filenames = listdir_nohidden(model_dir)

tas_df = pd.DataFrame()
gdd_df = pd.DataFrame()

for file in filenames:

    op = downsample_temps(file)

    tas_df = tas_df.append(op[0])
    gdd_df = gdd_df.append(op[1])

tas_df.to_csv("means_t2m_" + year1 + "-" + year2 + "_ERA5.csv")
gdd_df.to_csv("gdd_season_" + year1 + "-" + year2 + "_ERA5.csv")


# Individual file analysis for new data

In [None]:
dataset = xr.open_dataset("/Volumes/My Book/Climate/ERA/gaps/ALL_old2/ALL_old2_t2m_1950.nc")

In [None]:
#dataset = dataset.rename({'valid_time' : 'time'})

In [None]:
dataset['t2m'] -= 273.15
dataset.t2m.attrs['units'] = 'deg C'
max_daily = dataset.resample(time='D').max(dim='time')
min_daily = dataset.resample(time='D').min(dim='time')
mean_daily = dataset.resample(time='D').mean(dim='time')

max_daily = max_daily.rename({'t2m':'max_t2m'})
min_daily = min_daily.rename({'t2m':'min_t2m'})
mean_daily = mean_daily.rename({'t2m':'mean_t2m'})

merged_data = xr.merge([max_daily,min_daily,mean_daily])

# faster to do this on the xarray
merged_data['year'] = merged_data['time'].dt.strftime('%Y')
merged_data['month'] = merged_data['time'].dt.strftime('%B')
merged_data['day'] = merged_data['time'].dt.strftime('%d')

df = merged_data.to_dataframe()
df = df.reset_index()

df_means = df.groupby(['latitude','longitude','month','year'])[['min_t2m','max_t2m','mean_t2m']].mean()
df_means = df_means.reset_index()

df['gdd'] = (df['max_t2m'] + df['min_t2m'])/2 - 5.6
df['gdd'] = np.where(df['gdd'] < 0, 0, df['gdd'])

df['gdd_sum'] = df.groupby(['latitude','longitude']).cumsum()['gdd']

df['julian'] = pd.DatetimeIndex(df['time']).dayofyear
hatch_pred = df[df.gdd_sum>= 300].groupby(['latitude','longitude','year']).min('julian').rename(columns = {'julian' : 'hatch'})
hatch_pred['julian'] = hatch_pred['hatch'] + 69

hatch_pred_small = hatch_pred.reset_index()[['latitude','longitude','year','julian']]

gdd_before = df[df.gdd_sum< 300].groupby(['latitude','longitude','year']).max('julian').reset_index()[['latitude','longitude','year','gdd_sum','julian']].rename(columns = {'gdd_sum' : 'gdd_subtract', 'julian' : 'hatch'})
gdd_before['hatch'] = gdd_before['hatch'] + 1

season_gdds = pd.merge(hatch_pred_small,df, how = 'left')

season_gdds = pd.merge(season_gdds,gdd_before,how = 'left')

season_gdds['gdd_season'] = season_gdds['gdd_sum'] - season_gdds['gdd_subtract']
#season_gdds = season_gdds[['latitude','longitude','year','hatch','julian','gdd_season']]

In [None]:
#df_means.to_csv("/Volumes/My Book/Climate/WUS/downsampled/means_t2m_2024_ERA5.csv")
season_gdds.to_csv("/Volumes/My Book/Climate/WUS/downsampled/gdd_season_t2m_2024_ERA5.csv")

In [None]:
season_gdds = season_gdds[['latitude','longitude','year','hatch','julian','gdd_season']]

In [None]:
season_gdds

## Cold Tolerance

In [None]:
# egg mortality- -24 and -28 degrees
# larval mortality 0 degrees

def cold_tolerance(filename):

    dataset = xr.open_dataset(filename)

    dataset = dataset.rename({'valid_time' : 'time'})

    dataset['t2m'] -= 273.15
    dataset.t2m.attrs['units'] = 'deg C'
    min_daily = dataset.resample(time='D').min(dim='time')

    min_daily = min_daily.rename({'t2m':'min_t2m'})
    # faster to do this on the xarray
    min_daily['year'] = min_daily['time'].dt.strftime('%Y')
    min_daily['month'] = min_daily['time'].dt.strftime('%B')
    min_daily['day'] = min_daily['time'].dt.strftime('%d')

    df = min_daily.to_dataframe()
    df = df.reset_index()

    df['thresh1'] = np.where(df['min_t2m'] <= -28, 1, 0)
    df['thresh2'] = np.where(df['min_t2m'] <= -24, 1, 0)
    df['thresh3'] = np.where(df['min_t2m'] <= 0, 1, 0)    
    cold_sums = df.groupby(['latitude','longitude','month','year'])[['thresh1','thresh2','thresh3']].sum()
    abs_min = df.groupby(['latitude','longitude','month','year'])[['min_t2m']].min()

    cold_sums = cold_sums.reset_index()
    abs_min = abs_min.reset_index()

    cold_df = pd.merge(cold_sums,abs_min, how = 'left')

    return(cold_df)

In [None]:
model_dir = "/Volumes/My Book/Climate/ERA/gaps/ALL_recent/"
year1 = '1979'
year2 = '2024'

In [None]:
filenames = listdir_nohidden(model_dir)

tol_df = pd.DataFrame()

for file in filenames:

    op = cold_tolerance(file)

    tol_df = tol_df.append(op)

tol_df.to_csv(model_dir + "../downsampled/cold_tolerance_" + year1 + "-" + year2 + "_ERA5.csv")


In [None]:
tol_df