<a href="https://colab.research.google.com/github/sulochandhungel/ReferenceET/blob/master/NLDAS_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import ee
# Trigger the authentication flow.
ee.Authenticate()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&code_challenge=cfpbR-INks4kISf15NBR35MCoJgxY5s5ujXImSU_HpM&code_challenge_method=S256

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/ygH4X5eq4pOVva72h-QsfZP5iIeh9ZOFaONhQ7yuPe7OZWBNOSXm3Aw

Successfully saved authorization token.


In [8]:
# Import Earth Engine
import ee

try:
    ee.Initialize()
    print('The Earth Engine package initialized successfully!')
except ee.EEException as e:
    print('The Earth Engine package failed to initialize!')
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

The Earth Engine package initialized successfully!


In [10]:
import subprocess

try:
    import timezonefinder
except ImportError:
    print('timezonefinder package not installed. Installing ...')
    subprocess.check_call(["python", '-m', 'pip', 'install', 'timezonefinder'])
print ("Done!")

timezonefinder package not installed. Installing ...
Done!


In [37]:
# Function to get NLDAS data
def getNLDAS_data_GMT(reqd_area, GMTyear_, GMTmonth_, GMTday_, GMThour_, GMTminute_, GMTsecond_, var_name, resample_to_elev = True):
    import datetime
    import ee
    ee.Initialize()
    
    def getRH(temp, elev, q):
        pair = elev.expression('101.3 * pow((293 - 0.0065 * b()) / 293, 5.26)')
        es = temp.expression('0.6108 * exp(17.27 * b() / (b() + 237.3))')
        ea = pair.expression('q * pair / (0.622 + 0.378 * q)', {'pair':pair, 'q':q})
        return (ea.divide(es).multiply(ee.Image.constant(100.0)))
    
    selDT = datetime.datetime(GMTyear_, GMTmonth_, GMTday_, GMThour_, GMTminute_, GMTsecond_)
    endDT = (selDT + datetime.timedelta(days=1))
    
    if GMTminute_>=30:
        GMThour_ = GMThour_ + 1
    
    nldas_coll = ee.ImageCollection('FORA0125_H002').filterDate(selDT.strftime('%Y-%m-%d'), endDT.strftime('%Y-%m-%d')).filterBounds(reqd_area.geometry()).filter(ee.Filter.eq('start_hour', GMThour_))
    nldas_img = ee.Image(nldas_coll.first())
    
    rd = ee.Image('USGS/NED').projection() # Getting the data to the highest possible resolution of 10m
    ans_img_coarse = nldas_img.select(var_name).clip(reqd_area.geometry())
    
    if resample_to_elev:
        ans_img = nldas_img.select(var_name).resample('bilinear').reproject(rd).clip(reqd_area.geometry())
    else:
        ans_img = ans_img_coarse
    
    if var_name == "shortwave_radiation":
        ans_img = ans_img.divide(ee.Image.constant(1000.0)) # Get solar radiation in kW m-2 (Slr_Kw_Avg)
        ans_img_coarse = ans_img_coarse.divide(ee.Image.constant(1000.0))
        
    if var_name == "relative_humidity":
        temp = nldas_img.select('temperature')
        elev = ee.Image('USGS/NED')
        q = nldas_img.select('specific_humidity')
        ans_img_coarse = getRH(temp, elev, q).clip(reqd_area.geometry())
        if resample_to_elev:
            ans_img = getRH(temp, elev, q).resample('bilinear').reproject(rd).clip(reqd_area.geometry())
        else:
            ans_img = ans_img_coarse

    
    if var_name == "windspeed":
        wind_u = nldas_img.select("wind_u")
        wind_v = nldas_img.select("wind_v")
        uz = ((wind_u.pow(2)).add(wind_v.pow(2))).sqrt().clip(reqd_area.geometry())
        zw = ee.Image.constant(10) # wind speed measured at 10 m
        ans_img_coarse = uz.expression('uz * 4.87 / log(67.8 * zw - 5.42)', {'uz':uz, 'zw':zw})
        if resample_to_elev:
            uz = ((wind_u.pow(2)).add(wind_v.pow(2))).sqrt().resample('bilinear').reproject(rd).clip(reqd_area.geometry())
            ans_img = uz.expression('uz * 4.87 / log(67.8 * zw - 5.42)', {'uz':uz, 'zw':zw})
        else:
            ans_img = ans_img_coarse
        
    return ({'ans_img':ans_img,\
             'mean_val':list(ans_img_coarse.reduceRegion(ee.Reducer.mean(), reqd_area.geometry()).getInfo().values())[0]})
#print ("Done!")

def getNLDAS_data(reqd_area, year_ = 2015, month_ = 5, day_ = 13,
                   hour_ = 11, minute_ = 45, second_ = 11, var_name = 'shortwave_radiation', resample_to_elev = True):
    import pandas as pd
    from datetime import datetime
    from pytz import timezone

    from timezonefinder import TimezoneFinder

    tf = TimezoneFinder()
    import pytz

    reqd_area_cent = reqd_area.centroid().getInfo().get('geometry').get('coordinates')
    #print (reqd_area_cent)
    tz = pytz.timezone(str(tf.timezone_at(lng = reqd_area_cent[0], lat = reqd_area_cent[1])))

    #gmt_DT = datetime(im_year, im_month, im_day, im_hour, im_minute, im_second)
    loc_DT = datetime(year_, month_, day_, hour_, minute_, second_)
    tzoffset = tz.utcoffset(loc_DT, is_dst=True)
    #print (tzoffset)
    gmt_DT = loc_DT - tzoffset
    #print (gmt_DT)
    #print (loc_DT)
    #loc_DT = gmt_DT + tzoffset
    
    return (getNLDAS_data_GMT(reqd_area = reqd_area, GMTyear_ = gmt_DT.year, GMTmonth_ = gmt_DT.month, GMTday_ = gmt_DT.day,
                   GMThour_ = gmt_DT.hour, GMTminute_ = gmt_DT.minute, GMTsecond_ = gmt_DT.second, var_name = var_name, resample_to_elev = resample_to_elev))

#xx = getNLDAS_data_GMT(reqd_area = reqd_area, GMTyear_ = 2015, GMTmonth_ = 5, GMTday_ = 13,
#                   GMThour_ = 11, GMTminute_ = 45, GMTsecond_ = 11, var_name = 'relative_humidity', resample_to_elev = True)

print (" --- Checking NLDAS data acquistion ----- ")
lng = -118.93611
lat = 46.20527
buff_dist = 1*1000
loc_reqd = ee.Feature(ee.Geometry.Point(lng, lat))
reqd_area = loc_reqd.buffer(buff_dist)

xx = getNLDAS_data(reqd_area= reqd_area, year_ = 2015, month_ = 5, day_ = 13,\
                  hour_ = 11, minute_ = 45, second_ = 11,\
                  var_name = 'shortwave_radiation', resample_to_elev = True)
print (xx)
#getNLDAS_data()
print ("Done!")


 --- Checking NLDAS data acquistion ----- 
{'ans_img': <ee.image.Image object at 0x7f771d55bcf8>, 'mean_val': 0.371481}
Done!


In [39]:
import datetime
DTs = ["2016-04-08 11:43:18", "2016-04-16 11:45:58", "2016-05-02 11:46:04",\
           "2016-05-10 11:43:14", "2016-05-18 11:46:05", "2016-06-19 11:46:14"]
echoans = False

print (DTs)
pd.to_datetime(DTs, format = '%Y-%m-%d %H:%M:%S').rename("TIMESTAMP")
#loc_DTs = [datetime.datetime.strptime(loc_DT, "%Y-%m-%d %H:%M:%S") for loc_DT in DTs] 
#DateTime.year

['2016-04-08 11:43:18', '2016-04-16 11:45:58', '2016-05-02 11:46:04', '2016-05-10 11:43:14', '2016-05-18 11:46:05', '2016-06-19 11:46:14']


DatetimeIndex(['2016-04-08 11:43:18', '2016-04-16 11:45:58',
               '2016-05-02 11:46:04', '2016-05-10 11:43:14',
               '2016-05-18 11:46:05', '2016-06-19 11:46:14'],
              dtype='datetime64[ns]', name='TIMESTAMP', freq=None)

In [41]:
#Get a dataframe of NLDAS data mean
import pandas as pd
import numpy as np
w_data_type = "NLDAS"

if w_data_type == "NLDAS":
    #DateTime = pd.to_datetime(Sat_DTs_CldCov['Date'] , format = '%Y-%m-%d %H:%M:%S').rename('TIMESTAMP')
    DateTime = pd.to_datetime(DTs, format = '%Y-%m-%d %H:%M:%S').rename('TIMESTAMP')
    
    records = pd.DataFrame(range(1, (DateTime.size)+1), columns= ['RECORD'])
    years = DateTime.year.rename('Year')
    months = DateTime.month.rename('Month')
    days = DateTime.day.rename('Day')
    hours = DateTime.hour.rename('Hour')
    minutes = DateTime.minute.rename('Minute')
    seconds = DateTime.second.rename('Second')
    
    
    ws_ms_avg = []
    winddir = []
    slrkw_avg = []
    airtc_avg = []
    rh = []
    rainmmtot  = []
    
    for i in range(0,len(DateTime)):
        print (DateTime[i])
        def NLDAS_common(var_name, reqd_area = reqd_area, echoans = echoans):
            ans = getNLDAS_data(reqd_area, year_ = years[i],
                                month_ = months[i],
                                day_ = days[i],
                                hour_ = hours[i],
                                minute_ = minutes[i],
                                second_ = seconds[i],
                                var_name = var_name, resample_to_elev = True).get('mean_val')
            if echoans:
              print (var_name + " = " + str(ans))
            return (ans)
    
        ws_ms_avg.append(NLDAS_common('windspeed'))
        winddir.append(None)
        slrkw_avg.append(NLDAS_common('shortwave_radiation'))
        airtc_avg.append(NLDAS_common('temperature'))
        rh.append(NLDAS_common('relative_humidity'))
        rainmmtot.append(NLDAS_common('total_precipitation'))
    

    list_of_series = [DateTime,
                      pd.Series(years), pd.Series(months), pd.Series(days),
                      pd.Series(hours), pd.Series(minutes), pd.Series(seconds),
                      pd.Series(ws_ms_avg).rename('WS_ms_Avg'),
                      pd.Series(winddir).rename('WindDir'),
                      pd.Series(slrkw_avg).rename('SlrkW_Avg'),
                      pd.Series(airtc_avg).rename('AirTC_Avg'),
                      pd.Series(rh).rename('RH'),
                      pd.Series(rainmmtot).rename('Rain_mm_Tot')]

    hourly_data = None
    for ser in list_of_series:
        if hourly_data is None:
            hourly_data = pd.DataFrame(ser)
        else:
            hourly_data = pd.concat([hourly_data, ser], axis=1)
    hourly_data = hourly_data.reset_index(drop=True)
    hourly_data = pd.concat([hourly_data, records], axis=1)

    # Create a complete dataframe for all the variables
    w_df = pd.DataFrame().reindex_like(hourly_data).iloc[0:len(loc_DTs)]
    w_df['TIMESTAMP'] = loc_DTs

    w_df1 = pd.concat([hourly_data, w_df]).sort_values(by=['TIMESTAMP']).reset_index(drop=True)
    indsNaNs =  (w_df1[w_df1['RECORD'].isnull()].index) # only those datetimes are interpolated for Satellite datetimes
    #indsNaNs =  (w_df1[w_df1.isnull().any(axis=1)].index) # all NaNs are interpolated 
    #print (indsNaNs)

    w_df1 = w_df1.replace(-9999.000000, np.nan)
    w_df_interp = w_df1.interpolate('linear')

    w_df_4_Sat_DT_NLDAS = w_df_interp.iloc[indsNaNs]
    w_df_4_Sat_DT = w_df_4_Sat_DT_NLDAS
    
    
    print (w_df_4_Sat_DT)
print ("Done!")
#print (DateTime)


2016-04-08 11:43:18
2016-04-16 11:45:58
2016-05-02 11:46:04
2016-05-10 11:43:14
2016-05-18 11:46:05
2016-06-19 11:46:14
             TIMESTAMP    Year  Month  ...         RH  Rain_mm_Tot  RECORD
1  2016-04-08 11:43:18  2016.0    4.0  ...  39.726579          0.0     1.5
3  2016-04-16 11:45:58  2016.0    4.5  ...  34.290618          0.0     2.5
5  2016-05-02 11:46:04  2016.0    5.0  ...  31.943864          0.0     3.5
7  2016-05-10 11:43:14  2016.0    5.0  ...  34.066210          0.0     4.5
9  2016-05-18 11:46:05  2016.0    5.5  ...  32.107814          0.0     5.5
11 2016-06-19 11:46:14  2016.0    6.0  ...  29.954649          0.0     6.0

[6 rows x 14 columns]
Done!
