In [None]:
import sys
import glob
import os
import pandas as pd
import ee
import numpy as np
import geopandas as gp

import matplotlib.pyplot as plt
import rsfuncs as rs

from scipy import stats
from scipy.signal import savgol_filter
from datetime import datetime, timedelta
from datetime import datetime as dt

%load_ext autoreload
%autoreload 2

plt.rcParams['figure.figsize'] = [18,16]
ee.Initialize()

In [2]:
# EE functions are in the rsfuncs module. Loacal functions are here: 

In [3]:
def read_sm_file(filename):
    with open(filename) as f:
        contents = f.readlines()

    data = []
    for line in contents:
        if line[0:1] == "#":
            continue
        else:
            data.append(line)

    headers = [x.replace("Soil Moisture Percent","smp").replace(" ","_") for x in data[0].split(",")]
    cols = [x.strip("\n").split(",") for x in data[1:]]

    df = pd.DataFrame(cols, columns = headers)
    
    return df

def get_ims_by_date(ims_list, var, res=10):
    imlist = []
    imdates = []
    num_images = len(ims_list.getInfo())

    for i in range (0, num_images):
        
        if i % 5 == 0:
            print(str((i / num_images)*100)[:5] + " % ")

        latlon = ee.Image.pixelLonLat().addBands(ims_list.get(i))
        imlist.append(array_from_latlon(latlon, var, res))
        date =  latlon.get('system:time_start')
        info_dict = ims.get(i).getInfo()
        date = info_dict['id']
        imdates.append(date)

    return imlist, imdates

def array_from_latlon(latlon_obj, var, res ):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.toList(),geometry=area,maxPixels=1e8,scale=res)
    try:
        lats = np.array((ee.Array(res.get("latitude")).getInfo()))
        lons = np.array((ee.Array(res.get("longitude")).getInfo()))
        data = np.array((ee.Array(res.get(var)).getInfo()))
    except:
        data = np.full_like(lats, np.nan,dtype=np.float64)
    
    out = make_np_array(data, lats, lons)
    return out   

def make_np_array(data, lats, lons):
    # get data from df as arrays
    lons = np.array(lons)
    lats = np.array(lats)
    data = np.array(data) # Set var here 
                                              
    # get the unique coordinates
    uniqueLats = np.unique(lats)
    uniqueLons = np.unique(lons)

    # get number of columns and rows from coordinates
    ncols = len(uniqueLons)    
    nrows = len(uniqueLats)

    # determine pixelsizes
    ys = uniqueLats[1] - uniqueLats[0] 
    xs = uniqueLons[1] - uniqueLons[0]

    # create an array with dimensions of image
    arr = np.zeros([nrows, ncols], np.float32)

    # fill the array with values
    counter =0
    for y in range(0,len(arr),1):
        for x in range(0,len(arr[0]),1):
            if lats[counter] == uniqueLats[y] and lons[counter] == uniqueLons[x] and counter < len(lats)-1:
                counter+=1
                arr[len(uniqueLats)-1-y,x] = data[counter] # we start from lower left corner
    
    return arr

def filter_date(product,y,m,d):
    start = ee.Date.fromYMD(y,m,d).advance(-1, "day")
    end = ee.Date.fromYMD(y,m,d)
    prod = product.filterDate(start, end).sort('system:time_start', False).select("ppt")
    return prod

def get_2day_precip(latlon_obj, area):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.sum(),geometry=area,scale=10)
    data = np.array((ee.Array(res.get("ppt")).getInfo()))
    out = np.array(data)
    return out 

def get_ndvi(latlon_obj, area):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.mean(),geometry=area,scale=10)
    data = np.array((ee.Array(res.get("NDVI")).getInfo()))
    out = np.array(data)
    return out 

def round_up_to_odd(f):
    f = int(np.ceil(f))
    return f + 1 if f % 2 == 0 else f

In [4]:
site_file = gp.read_file("../shape/scan_sites.shp")
sites = site_file[~site_file['state'].isin(["AK", "HI", "PR", "VI"])]

In [5]:
data = rs.load_data()

In [6]:
data_dir = "../data"

In [57]:
out_dict = {}

In [58]:
# For each site id, find the sm file with the data (in data dir), 
# calculate psi as psi = A * SM ^b (krishna's paper )
# query the (1) landcover, (2) Sentinel backscatter (Prism P), MODIS / Landsat LAI for the whole timeseries 
   
for idx, row in sites.iterrows():
    
    if row.id in out_dict.keys():
        print(row.id)
        continue
        
    print("Processings site no {}".format(row.id))

    # Get the corresponding SCAN data file from data folder
    site_id = row.id
    sm_file = [os.path.join(data_dir,x) for x in os.listdir(data_dir) if site_id in x][0]

    try:
        sm_dat = read_sm_file(sm_file)
        sm_dat['Date'] =  pd.to_datetime(sm_dat['Date'], format='%Y%m%d %H:%M')
        sm_dat.set_index('Date', inplace=True)
    except:
        print("no valid soil moisture data for {}".format(row.id))
        continue

    if sm_dat.empty:
        print("no valid soil moisture data for {}".format(row.id))
        continue
        
    for i in sm_dat.columns:
        sm_dat[i] = pd.to_numeric(sm_dat[i])
        
    sm_dat = sm_dat.resample('12D').mean().fillna(np.nan)
    
    rzsm = sm_dat[sm_dat.columns[-1]].values
    ssm = sm_dat[sm_dat.columns[0]].values
    
    # params to calculate psi 
    a = row.a
    b = row.b
    
    # RZ and Surface LWP = A * sm ^b
    psi1 = [a*(x/100)**b for x in rzsm]
    psi2 = [a*(x/100)**b for x in ssm]
    
    site = [row.id  for x in range(len(rzsm))]

    out_df = pd.DataFrame(list(zip(rzsm, ssm, psi1,psi2, site)),
              columns=['rzsm','ssm','psi_rz','psi_s','site'], index = sm_dat.index)
    
    out_dict[row.id] = out_df

Processings site no 2057
Processings site no 2078
Processings site no 2177
Processings site no 2113
Processings site no 2174
Processings site no 2055
Processings site no 2173
Processings site no 2180
Processings site no 2114
Processings site no 2178
Processings site no 2181
Processings site no 2182
Processings site no 2176
Processings site no 2056
Processings site no 2179
Processings site no 2115
Processings site no 2175
Processings site no 2053
Processings site no 2083
Processings site no 2091
Processings site no 2090
Processings site no 2085
Processings site no 2030
Processings site no 2084
Processings site no 2026
Processings site no 2214
Processings site no 2215
Processings site no 2189
Processings site no 2190




Processings site no 2187
Processings site no 2183
Processings site no 2191
Processings site no 2192
Processings site no 2185
Processings site no 2184
Processings site no 2218
Processings site no 2149
Processings site no 2217
Processings site no 2186
Processings site no 2219
Processings site no 2197
Processings site no 2017
Processings site no 2051
Processings site no 2012
Processings site no 2009
Processings site no 2027
Processings site no 2013
Processings site no 2031
Processings site no 2068
Processings site no 2148
Processings site no 674
Processings site no 2004
Processings site no 2092
Processings site no 2094
Processings site no 2147
Processings site no 2093
Processings site no 2079
Processings site no 2005
Processings site no 2049




Processings site no 2002
Processings site no 2050
Processings site no 2224
Processings site no 2223
Processings site no 2195
Processings site no 2048
Processings site no 2220
Processings site no 2194
Processings site no 2225
Processings site no 2060
Processings site no 2061
Processings site no 2193
Processings site no 2227
Processings site no 2047
Processings site no 2226
Processings site no 2032
Processings site no 2024
Processings site no 2025
Processings site no 2110
Processings site no 2087
Processings site no 2033
Processings site no 2046
Processings site no 2109
Processings site no 2070
Processings site no 2086
Processings site no 2064
Processings site no 2082
Processings site no 2034
Processings site no 2035
Processings site no 2117
Processings site no 2019
Processings site no 2121
Processings site no 581
Processings site no 2119
Processings site no 2120
Processings site no 808
Processings site no 2118
Processings site no 2008
Processings site no 2020
Processings site no 2111
Pr



Processings site no 2022
Processings site no 2074
Processings site no 2028
Processings site no 2036
Processings site no 2037
Processings site no 2038
Processings site no 2072
Processings site no 2076
Processings site no 2077
Processings site no 2075
Processings site no 2207
Processings site no 2006
Processings site no 2206
Processings site no 2201
Processings site no 2106
Processings site no 2105
Processings site no 2228
no valid soil moisture data for 2228
Processings site no 2016
Processings site no 2104
Processings site no 2199
Processings site no 2200
Processings site no 2203
Processings site no 2204
Processings site no 2202
Processings site no 2205
Processings site no 2138
Processings site no 2135
Processings site no 2151
Processings site no 2136
Processings site no 2161
Processings site no 2150
Processings site no 2125
Processings site no 2167
no valid soil moisture data for 2167
Processings site no 2130
Processings site no 2128
Processings site no 2126
Processings site no 2165
P

In [59]:
len(pd.concat(out_dict.values()).dropna())

18085

In [60]:
t = pd.concat(out_dict.values()).dropna()

In [61]:
t

Unnamed: 0_level_0,rzsm,ssm,psi_rz,psi_s,site
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-14,43.163889,36.331250,0.247791,1.167014,2057
2015-01-26,42.733333,35.846690,0.271166,1.316780,2057
2015-02-07,42.470357,34.819636,0.286644,1.710174,2057
2015-02-19,44.837786,38.078369,0.175994,0.764979,2057
2015-03-03,47.494035,40.620629,0.104890,0.427808,2057
2015-03-15,43.228125,38.267014,0.244499,0.731728,2057
2015-03-27,45.112937,37.021875,0.166574,0.985226,2057
2015-04-08,46.000000,39.837500,0.139817,0.509654,2057
2015-04-20,44.391103,39.565972,0.192575,0.541982,2057
2015-05-02,39.707639,19.704514,0.524840,286.075766,2057
