In [20]:
import os
import ee
import datetime
import time
import sklearn
import importlib

import geopandas as gp
import pandas as pd
import numpy as np
import rsfuncs as rs
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from pandas.tseries.offsets import MonthEnd
from dateutil.relativedelta import relativedelta
from sklearn import preprocessing

from tqdm import tqdm_notebook as tqdm

ee.Initialize()

# Load shapefile 
shp = gp.read_file('../shape/study_area/c2vsim_sub_18.shp')

# Make EE objects from shapefiles 
area = rs.gdf_to_ee_poly(shp)

# Load Small watersheds shapefile, dissolve, and simplify it slightly 
sw_shp = gp.read_file('../shape/study_area/small_sheds.shp').dissolve().explode()
sw_area = rs.gdf_to_ee_poly(sw_shp)

ro_gdf = gp.GeoDataFrame(pd.concat([shp, sw_shp])).dissolve().explode()
ro_area = rs.gdf_to_ee_poly(ro_gdf)

# Load RS data dict from rsfuncs.py
data = rs.load_data()

# ECMWF
data['ecmwf_r'] = [ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY") , 'runoff', 1e5, 11132] # mm --> km 


# Set start/end
strstart = '2020-01-01'
strend = '2021-09-30'

startdate = datetime.datetime.strptime(strstart, "%Y-%m-%d")
enddate = datetime.datetime.strptime(strend, "%Y-%m-%d")

print("-------" * 10)
print("Processing Runoff Data")
print("-------" * 10)

# R
# tc_r = rs.calc_monthly_sum(data['tc_r'], startdate, enddate, ro_area)
# ecmwf_r = rs.calc_monthly_sum(data['ecmwf_r'], startdate, enddate, ro_area)

----------------------------------------------------------------------
Processing Runoff Data
----------------------------------------------------------------------


In [22]:
print("-------" * 10)
print("Processing Runoff Data")
print("-------" * 10)

# R
tc_r = rs.calc_monthly_sum(data['tc_r'], startdate, enddate, ro_area)
tc_r.columns = ['tc_r']

ecmwf_r = rs.calc_monthly_sum(data['ecmwf_r'], startdate, enddate, ro_area)
ecmwf_r.columns = ['ecmwf_r']

fldas_ssr = rs.calc_monthly_sum(data['fldas_ssr'], startdate, enddate, ro_area)
fldas_bfr = rs.calc_monthly_sum(data['fldas_bfr'], startdate, enddate, ro_area)
fldas_r = pd.DataFrame(pd.concat([fldas_bfr, fldas_ssr], axis = 1).sum(axis =1))
fldas_r.columns = ['fldas_r']

gldas_ssr = rs.calc_monthly_sum(data['gldas_ssr'], startdate, enddate, ro_area)
gldas_bfr = rs.calc_monthly_sum(data['gldas_bfr'], startdate, enddate, ro_area)
gldas_r = pd.DataFrame(pd.concat([gldas_bfr, gldas_ssr], axis = 1).sum(axis =1))
gldas_r.columns = ['gldas_r']


rdfs = {"r_tc": tc_r, "r_gldas": gldas_r, "r_fldas": fldas_r, "r_ecmwf": ecmwf_r}

----------------------------------------------------------------------
Processing Runoff Data
----------------------------------------------------------------------
processing:


  0%|          | 0/21 [00:00<?, ?it/s]

IDAHO_EPSCOR/TERRACLIMATE/195801


100%|██████████| 21/21 [00:05<00:00,  3.55it/s]
  0%|          | 0/21 [00:00<?, ?it/s]

processing:
ECMWF/ERA5_LAND/MONTHLY/198101


100%|██████████| 21/21 [00:05<00:00,  3.96it/s]
  0%|          | 0/21 [00:00<?, ?it/s]

processing:
NASA/FLDAS/NOAH01/C/GL/M/V001/198201


100%|██████████| 21/21 [00:08<00:00,  2.37it/s]


processing:


  5%|▍         | 1/21 [00:00<00:02,  7.87it/s]

NASA/FLDAS/NOAH01/C/GL/M/V001/198201


100%|██████████| 21/21 [00:05<00:00,  3.70it/s]
  0%|          | 0/21 [00:00<?, ?it/s]

processing:
NASA/GLDAS/V021/NOAH/G025/T3H/A20000101_0300


100%|██████████| 21/21 [00:04<00:00,  4.71it/s]


processing:


  0%|          | 0/21 [00:00<?, ?it/s]

NASA/GLDAS/V021/NOAH/G025/T3H/A20000101_0300


100%|██████████| 21/21 [00:05<00:00,  3.64it/s]


In [26]:
outdir = "../data/wb_variables"

if not os.path.exists(outdir):
    os.mkdir(outdir)

r_outfn = os.path.join(outdir,"runoff.csv")
pd.concat(list(rdfs.values()), axis = 1).to_csv(r_outfn)


Unnamed: 0,tc_r,gldas_r,fldas_r,ecmwf_r
2020-01-31,0.00231,0.00627,0.007636,0.010482
2020-02-29,0.000304,0.005099,0.000362,0.008871
2020-03-31,0.010821,0.007252,0.009284,0.050099
2020-04-30,0.014101,0.007367,0.0055,0.098688
2020-05-31,0.000413,0.00365,0.00054,0.036058
2020-06-30,0.0,0.0028,5.9e-05,0.021578
2020-07-31,0.0,0.002562,4e-05,0.013191
2020-08-31,0.0,0.002395,3.9e-05,0.008463
2020-09-30,0.0,0.002216,6.5e-05,0.005849
2020-10-31,0.0,0.002216,0.000134,0.004399


In [15]:
pd.DataFrame(rdfs)

ValueError: If using all scalar values, you must pass an index

In [None]:
gl_bfr = rs.calc_monthly_sum(data['gldas_bfr'], startdate, enddate, ro_area)
gl_ssr = rs.calc_monthly_sum(data['gldas_ssr'], startdate, enddate, ro_area)


  0%|          | 0/249 [00:00<?, ?it/s]

processing:
NASA/GLDAS/V021/NOAH/G025/T3H/A20000101_0300


100%|██████████| 249/249 [00:54<00:00,  4.59it/s]
  0%|          | 0/249 [00:00<?, ?it/s]

processing:
NASA/GLDAS/V021/NOAH/G025/T3H/A20000101_0300


 60%|██████    | 150/249 [00:33<00:20,  4.81it/s]

In [None]:
gl_bfr.plot()
gl_ssr.plot()