## Import ERA-5 Relative Humidity and downsample from hourly to daily and monthly 

In [1]:
import numpy as np
from datetime import datetime
import pandas as pd
import xarray as xr
import flox
import glob
import os

  data = yaml.load(f.read()) or {}


In [2]:
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [3]:
def downsample_df(dataset):
    max_daily = dataset.resample(time='D').max(dim='time')
    min_daily = dataset.resample(time='D').min(dim='time')
    mean_daily = dataset.resample(time='D').mean(dim='time')
    
    max_daily = max_daily.rename({'r':'max_rh'})
    min_daily = min_daily.rename({'r':'min_rh'})
    mean_daily = mean_daily.rename({'r':'mean_rh'})
    
    merged_data = xr.merge([max_daily,min_daily,mean_daily])
    
    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')
    
    df = merged_data.to_dataframe()
    df = df.reset_index()
    
    return(df)

In [4]:
model_dir = "data/"

In [5]:
state = "Colorado"
state_code = "CO" # "AZ_new"
yr1 = 2000
yr2 = 2009

In [6]:
f1 = xr.open_dataset(model_dir + "rh/" + state_code+"_rh_" + str(yr1) + "-" + str(yr2) + ".nc", decode_times = True)

In [7]:
df1 = downsample_df(f1)

In [8]:
df_all = pd.concat([df1])

In [9]:
df_all.to_csv("downsampled/"+state+"_rh_" + str(yr1) + "-" + str(yr2) + ".csv")

### Summary statistics- Converting from daily to monthly mean, maximum, and minimum relative humidity

In [10]:
year1 = '2000'
year2 = '2009'

In [11]:
def downsample_rh(filename):

    dataset = xr.open_dataset(filename)

    max_daily = dataset.resample(time='D').max(dim='time')
    min_daily = dataset.resample(time='D').min(dim='time')
    mean_daily = dataset.resample(time='D').mean(dim='time')
    
    max_daily = max_daily.rename({'r':'max_rh'})
    min_daily = min_daily.rename({'r':'min_rh'})
    mean_daily = mean_daily.rename({'r':'mean_rh'})
    
    merged_data = xr.merge([max_daily,min_daily,mean_daily])
    
    # faster to do this on the xarray
    merged_data['year'] = merged_data['time'].dt.strftime('%Y')
    merged_data['month'] = merged_data['time'].dt.strftime('%B')
    merged_data['day'] = merged_data['time'].dt.strftime('%d')
    
    df = merged_data.to_dataframe()
    df = df.reset_index()

    df_means = df.groupby(['latitude','longitude','month','year'])[['mean_rh']].mean()
    df_means = df_means.reset_index()
    
    return(df_means)

In [12]:
filenames = listdir_nohidden(model_dir + 'rh/')

hurs_df = pd.DataFrame()

for file in filenames:

    hurs_df = hurs_df.append(downsample_rh(file))

hurs_df.to_csv("downsampled/means_rh_" + year1 + "-" + year2 + "_ERA5.csv")
