## Import ERA-5 Total Precipitation and downsample from hourly to daily and monthly 

In [None]:
import numpy as np
from datetime import datetime
import pandas as pd
import xarray as xr
import flox
import glob
import os

In [None]:
import matplotlib as plt

In [None]:
def sum_df(dataset):
    dataset.tp.attrs['units'] = 'm'
    sum_daily = dataset.resample(time='D').sum(dim='time')
    
    sum_daily = sum_daily.rename({'tp':'sum_tp'})
    
    # faster to do this on the xarray
    sum_daily['year'] = sum_daily['time'].dt.strftime('%Y')
    sum_daily['month'] = sum_daily['time'].dt.strftime('%B')
    sum_daily['day'] = sum_daily['time'].dt.strftime('%d')
    
    df = sum_daily.to_dataframe()
    df = df.reset_index()
    
    return(df)

In [None]:
model_dir = "data/"

In [None]:
state = "Colorado"
state_code = "CO" # "AZ_new"
yr1 = 2000
yr2 = 2009

In [None]:
f1 = xr.open_dataset(model_dir + "pr/" + state_code+"_pr_" + str(yr1) + "-" + str(yr2) + ".nc", decode_times = True)

In [None]:
df1 = sum_df(f1)

In [None]:
df_all = pd.concat([df1])

In [None]:
df_all.to_csv("downsampled/"+state+"_pr_" + str(yr1) + "-" + str(yr2) + ".csv")

### Summary statistics- Converting from daily to monthly total precipitation

In [None]:
year1 = '2000'
year2 = '2009'

In [None]:
def downsample_sums(filename):

    dataset = xr.open_dataset(filename)

    dataset.tp.attrs['units'] = 'm'
    sum_daily = dataset.resample(time='D').sum(dim='time')
    
    sum_daily = sum_daily.rename({'tp':'sum_tp'})
    
    # faster to do this on the xarray
    sum_daily['year'] = sum_daily['time'].dt.strftime('%Y')
    sum_daily['month'] = sum_daily['time'].dt.strftime('%B')
    sum_daily['day'] = sum_daily['time'].dt.strftime('%d')
    
    df = sum_daily.to_dataframe()
    df = df.reset_index()

    df_means = df.groupby(['latitude','longitude','month','year'])[['sum_tp']].sum()
    df_means = df_means.reset_index()
    
    return(df_means)

In [None]:
def listdir_nohidden(path):
    return glob.glob(os.path.join(path, '*'))

In [None]:
filenames = listdir_nohidden(model_dir + "pr/")

pr_df = pd.DataFrame()

for file in filenames:

    pr_df = pr_df.append(downsample_sums(file))

pr_df.to_csv("downsampled/means_pr_" + year1 + "-" + year2 + "_ERA5.csv")


# Individual file analysis for new data

In [None]:
dataset = xr.open_dataset("/Volumes/My Book/Climate/WUS/WUS_pr_2023.nc")
dataset = dataset.rename({'valid_time' : 'time'})

In [None]:
dataset

In [None]:
dataset.tp.attrs['units'] = 'm'
sum_daily = dataset.resample(time='D').sum(dim='time')

sum_daily = sum_daily.rename({'tp':'sum_tp'})

# faster to do this on the xarray
sum_daily['year'] = sum_daily['time'].dt.strftime('%Y')
sum_daily['month'] = sum_daily['time'].dt.strftime('%B')
sum_daily['day'] = sum_daily['time'].dt.strftime('%d')

df = sum_daily.to_dataframe()
df = df.reset_index()

df_means = df.groupby(['latitude','longitude','month','year'])[['sum_tp']].sum()
df_means = df_means.reset_index()


In [None]:
df_means.to_csv("/Volumes/My Book/Climate/WUS/downsampled/means_pr_2023_DEC_ERA5.csv")

## Precipitation days

In [None]:
def precip_days(filename):

    dataset = xr.open_dataset(filename)

    dataset.tp.attrs['units'] = 'm'
    sum_daily = dataset.resample(time='D').sum(dim='time')

    sum_daily = sum_daily.rename({'tp':'sum_tp'})

    # faster to do this on the xarray
    dataset['year'] = dataset['time'].dt.strftime('%Y')
    dataset['month'] = dataset['time'].dt.strftime('%B')
    dataset['day'] = dataset['time'].dt.strftime('%d')

    df = dataset.to_dataframe()
    df = df.reset_index()

    df['precip_days'] = np.where(df['sum_tp'] >=0.01, 1, 0)

    precip_days = df.groupby(['latitude','longitude','month','year'])[['precip_days']].sum()

    precip_days= precip_days.reset_index()

    #return(precip_days)


In [None]:
def precip_days2(filename):

    dataset = xr.open_dataset(filename)

    dataset = dataset.rename({'valid_time' : 'time'})

    dataset = dataset.reset_coords(names = "expver", drop = True)
    dataset = dataset.reset_coords(names = "number", drop = True)

    dataset['latitude'] = dataset['latitude'].astype(np.float32)
    dataset['longitude'] = dataset['longitude'].astype(np.float32)

    dataset = dataset[['latitude','longitude','time','tp']]

    dataset.tp.attrs['units'] = 'm'
    #sum_daily = dataset.resample(time='D').sum(dim='time')

    #sum_daily = sum_daily.rename({'tp':'sum_tp'})

    # faster to do this on the xarray
    dataset['year'] = dataset['time'].dt.strftime('%Y')
    dataset['month'] = dataset['time'].dt.strftime('%B')
    dataset['day'] = dataset['time'].dt.strftime('%d')

    df = dataset.to_dataframe()
    df = df.reset_index()

    sum_daily = df.groupby(['latitude','longitude','month','year','day'])[['tp']].sum()
    sum_daily= sum_daily.reset_index()
    sum_daily = sum_daily.rename(columns = {'tp':'sum_tp'})

    sum_daily['precip_days'] = np.where(sum_daily['sum_tp'] >=0.01, 1, 0)

    precip_days = sum_daily.groupby(['latitude','longitude','month','year'])[['precip_days']].sum()

    precip_days= precip_days.reset_index()

    return(precip_days)


In [None]:
model_dir = "/Volumes/My Book/Climate/ERA_PR/gaps/ALL_recent2/"
year1 = '2000'
year2 = '2024'

In [None]:
filenames = listdir_nohidden(model_dir)

precip_days_df = pd.DataFrame()

for file in filenames:

    op = precip_days2(file)

    precip_days_df = precip_days_df.append(op)

precip_days_df.to_csv(model_dir + "../downsampled/precip_days_" + year1 + "-" + year2 + "_ERA5.csv")


In [None]:
precip_days_df