# Import ERA-5 Climate data and filter by trap locations to reduce size

In [1]:
import numpy as np
from datetime import datetime

In [2]:
import pandas as pd

In [None]:
#!pip install pyyaml==5.4.1

In [3]:
import xarray as xr
import flox

  data = yaml.load(f.read()) or {}


In [4]:
state = "PNW"
state_code = "PNW"

In [5]:
f80 = xr.open_dataset("/Volumes/My Book/Climate/ERA_PR/"+state_code+"_PR_1980-1989.nc", decode_times = True)
f90 = xr.open_dataset("/Volumes/My Book/Climate/ERA_PR/"+state_code+"_PR_1990-1999.nc", decode_times = True)
f00 = xr.open_dataset("/Volumes/My Book/Climate/ERA_PR/"+state_code+"_PR_2000-2009.nc", decode_times = True)
f10 = xr.open_dataset("/Volumes/My Book/Climate/ERA_PR/"+state_code+"_PR_2010-2020.nc", decode_times = True)
f20 = xr.open_dataset("/Volumes/My Book/Climate/ERA_PR/"+state_code+"_PR_2021-2023.nc", decode_times = True)


In [6]:
# f80 = xr.open_dataset("/Volumes/My Book/Climate/missing_updates/"+state_code+"_PR_missing1_1980-1989.nc", decode_times = True)
# f90 = xr.open_dataset("/Volumes/My Book/Climate/missing_updates/"+state_code+"_PR_missing1_1990-1999.nc", decode_times = True)
# f00 = xr.open_dataset("/Volumes/My Book/Climate/missing_updates/"+state_code+"_PR_missing1_2000-2009.nc", decode_times = True)
# f10 = xr.open_dataset("/Volumes/My Book/Climate/missing_updates/"+state_code+"_PR_missing1_2010-2020.nc", decode_times = True)

In [None]:
f80.sel(time = '1980-01-01')

In [None]:
print(f80.longitude)
print(f80.latitude)

In [6]:
def sum_df(dataset):
    dataset.tp.attrs['units'] = 'm'
    sum_daily = dataset.resample(time='D').sum(dim='time')
    
    sum_daily = sum_daily.rename({'tp':'sum_tp'})
    
    # faster to do this on the xarray
    sum_daily['year'] = sum_daily['time'].dt.strftime('%Y')
    sum_daily['month'] = sum_daily['time'].dt.strftime('%B')
    sum_daily['day'] = sum_daily['time'].dt.strftime('%d')
    
    df = sum_daily.to_dataframe()
    df = df.reset_index()
    
    return(df)

In [7]:
df80 = sum_df(f80)
df90 = sum_df(f90)
df00 = sum_df(f00)
df10 = sum_df(f10)
df20 = sum_df(f20)

In [8]:
df80.describe

<bound method NDFrame.describe of               time  longitude  latitude        sum_tp  year     month day
0       1979-01-01     -123.0     52.00  6.166883e-04  1979   January  01
1       1979-01-01     -123.0     51.75  4.384825e-04  1979   January  01
2       1979-01-01     -123.0     51.50  4.198188e-04  1979   January  01
3       1979-01-01     -123.0     51.25  4.704427e-04  1979   January  01
4       1979-01-01     -123.0     51.00  5.174857e-04  1979   January  01
...            ...        ...       ...           ...   ...       ...  ..
9390061 1989-12-31     -109.0     43.00  2.556480e-06  1989  December  31
9390062 1989-12-31     -109.0     42.75  2.045184e-06  1989  December  31
9390063 1989-12-31     -109.0     42.50  0.000000e+00  1989  December  31
9390064 1989-12-31     -109.0     42.25  5.112961e-07  1989  December  31
9390065 1989-12-31     -109.0     42.00  1.022592e-06  1989  December  31

[9390066 rows x 7 columns]>

In [9]:
df_all = pd.concat([df80,df90,df00,df10,df20])

In [10]:
df_all.to_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/"+state+"_PR_allyears.csv")

### Filtering by traps in the state

In [13]:
trap_data = pd.read_csv("/Volumes/My Book/Synchrony/_data/ll_info_23.csv")

In [11]:
cali_data = pd.read_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/California_PR_allyears.csv")
colo_data = pd.read_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/Colorado_PR_allyears.csv")
pnw_data = pd.read_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/PNW_PR_allyears.csv")
ari_data = pd.read_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/Arizona_PR_allyears.csv")
newm_data = pd.read_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/NewMexico_PR_allyears.csv")

In [12]:
df_all = pd.concat([cali_data,colo_data,pnw_data,ari_data,newm_data])

In [14]:
state_df = trap_data.reset_index(drop=True)

In [15]:
trap_lats = state_df.lat
trap_lons = state_df.lon

print(trap_lats)

0       34.452258
1       34.429790
2       33.861750
3       33.816588
4       33.812968
          ...    
1771    45.897575
1772    45.861841
1773    45.826436
1774    45.893359
1775    45.617810
Name: lat, Length: 1776, dtype: float64


In [16]:
min(trap_lats)

32.427858

In [17]:
grid_lon = list(set(df_all.longitude))
grid_lat = list(set(df_all.latitude))

In [18]:
lat_coord = []

for t in range(0,len(trap_lats)):
    lat_coord.append(grid_lat[np.where(abs(trap_lats[t]- grid_lat) == min(abs(trap_lats[t]- grid_lat)))[0][0]])
    
lon_coord = []

for t in range(0,len(trap_lats)):
    lon_coord.append(grid_lon[np.where(abs(trap_lons[t]- grid_lon) == min(abs(trap_lons[t]- grid_lon)))[0][0]])

In [19]:
print(min(lat_coord),max(lat_coord))
print(min(lon_coord),max(lon_coord))

[34.5, 34.5, 33.75, 33.75, 33.75, 33.25, 33.25, 34.0, 34.0, 32.75, 32.75, 32.5, 34.5, 34.5, 34.25, 34.5, 50.75, 50.75, 50.75, 50.75, 51.0, 51.0, 51.0, 51.25, 51.25, 51.0, 51.0, 51.0, 51.25, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.25, 51.25, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.25, 51.25, 51.25, 51.25, 51.25, 50.75, 51.25, 51.25, 51.25, 51.5, 51.0, 51.0, 51.0, 50.75, 50.75, 50.75, 50.75, 50.75, 51.0, 51.0, 50.25, 50.5, 50.75, 50.5, 50.25, 50.25, 49.75, 49.5, 49.25, 49.0, 49.25, 49.25, 50.5, 51.0, 51.0, 51.0, 51.0, 50.5, 51.0, 51.0, 50.75, 50.75, 50.75, 49.25, 49.25, 49.25, 49.25, 49.25, 49.25, 49.25, 49.25, 49.25, 49.5, 49.5, 49.5, 50.0, 50.0, 50.75, 50.75, 42.0, 42.0, 41.5, 41.5, 40.25, 40.25, 40.75, 40.75, 40.75, 39.0, 39.0, 39.0, 38.75, 38.5, 38.5, 38.25, 36.25, 39.5, 39.75, 39.75, 35.75, 35.75, 36.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 38.75, 38.75, 38.75, 39.0, 38.75, 38.75, 38.75, 38.5, 38.75, 38.75, 38.75, 3

In [20]:
coord_df = pd.DataFrame({'lat' : trap_lats, 'lon' : trap_lons,
             'lat_coord' : lat_coord, 'lon_coord': lon_coord})

In [21]:
state_df = pd.merge(state_df,coord_df)

In [22]:
state_df["key"] = state_df["lat_coord"].astype(str) + state_df["lon_coord"].astype(str)

In [23]:
state_keys = list(set(state_df.key))

In [24]:
df_all["key"] = df_all["latitude"].astype(str) + df_all["longitude"].astype(str)

In [25]:
trap_all = df_all[df_all['key'].isin(state_keys)]

In [26]:
trap_all

Unnamed: 0.1,Unnamed: 0,time,longitude,latitude,sum_tp,year,month,day,expver,key
0,0,1979-01-01,-122.00,42.00,1.117587e-08,1979,January,1,,42.0-122.0
38,38,1979-01-01,-121.75,40.75,1.117587e-08,1979,January,1,,40.75-121.75
72,72,1979-01-01,-121.50,40.50,1.117587e-08,1979,January,1,,40.5-121.5
73,73,1979-01-01,-121.50,40.25,1.117587e-08,1979,January,1,,40.25-121.5
99,99,1979-01-01,-121.25,42.00,1.117587e-08,1979,January,1,,42.0-121.25
...,...,...,...,...,...,...,...,...,...,...
1963629,514285,2020-12-31,-105.75,33.00,0.000000e+00,2020,December,31,,33.0-105.75
1963630,514286,2020-12-31,-105.75,32.75,0.000000e+00,2020,December,31,,32.75-105.75
1963632,514288,2020-12-31,-105.50,36.25,0.000000e+00,2020,December,31,,36.25-105.5
1963645,514301,2020-12-31,-105.50,33.00,0.000000e+00,2020,December,31,,33.0-105.5


In [27]:
trap_all.to_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/trap_pr_data_allT.csv")

## Missing oregon data

In [24]:
#trap_all.to_csv("/Volumes/My Book/Climate/ERA_PR/downsampled/trap_pr_data_missing.csv")

In [None]:
state_all.to_csv("/Volumes/My Book/Climate/ERA/downsampled/yearly_aggregates_2009.csv")