In [1]:
import sys
import glob
import os
import pandas as pd
import ee
import numpy as np
import geopandas as gp

import matplotlib.pyplot as plt
import rsfuncs as rs

from scipy import stats
from datetime import datetime, timedelta

%load_ext autoreload
%autoreload 2

plt.rcParams['figure.figsize'] = [18,16]
ee.Initialize()

In [2]:
# EE functions are in the rsfuncs module. Loacal functions are here: 

In [3]:
def read_sm_file(filename):
    with open(filename) as f:
        contents = f.readlines()

    data = []
    for line in contents:
        if line[0:1] == "#":
            continue
        else:
            data.append(line)

    headers = [x.replace("Soil Moisture Percent","smp").replace(" ","_") for x in data[0].split(",")]
    cols = [x.strip("\n").split(",") for x in data[1:]]

    df = pd.DataFrame(cols, columns = headers)
    
    return df

In [4]:
site_file = gp.read_file("../shape/scan_sites.shp")
sites = site_file[~site_file['state'].isin(["AK", "HI", "PR"])]

In [5]:
data = rs.load_data()

In [6]:
data_dir = "../data"

In [7]:
# For each site id, find the sm file with the data (in data dir), 
# calculate psi as psi = A * SM ^b (krishna's paper )
# query the (1) landcover, (2) Sentinel backscatter (Prism P), MODIS / Landsat LAI for the whole timeseries 
   
for idx, row in sites[:1].iterrows():
    
    # Get the corresponding soil moisture file from data folder
    site_id = row.id
    sm_file = [os.path.join(data_dir,x) for x in os.listdir(data_dir) if site_id in x][0]
    sm_dat = read_sm_file(sm_file)
    
    # Make geom to submit to EE 
    x,y = row.geometry.buffer(0.0001).envelope.exterior.coords.xy
    coords = [list(zip(x,y))]
    area = ee.Geometry.Polygon(coords)
        
    # start and end date
    sm_dat.Date = pd.to_datetime(sm_dat.Date)
    startdate = sm_dat.Date.iloc[0]
    enddate = sm_dat.Date.iloc[-1]
    
    date = startdate.strftime("%Y-%m-%d")
    
    # Select the nlcd dataset
    dataset =  data['nlcd']
    ic = dataset[0]
    var = dataset[1]
    res = dataset[3]
    
    # find the nearest nlcd dataset 
    col = ic.filterDate(ee.Date(date).advance(-1, 'years'), ee.Date(date).advance(2, 'years')).first()
    t = col.reduceRegion(ee.Reducer.mean(), area, res).get(var)
    meta = col.getInfo()
    lctype = t.getInfo()
    
    # Get the landcover type
    lcidx = meta['properties']['landcover_class_values'].index(lctype)
    lctype = meta['properties']['landcover_class_names'][lcidx]
    print(lctype)
    
    # Get the other datasets. Start with sentinel. 
    
    # Filter by prism (DAILY)
    
#     prism = rs.get_ims(data['prism'], range(startdate.year, enddate.year),range(1,13), area) 
#     prism = rs.get_ims(data['landsat'], range(startdate.year, enddate.year), range(1,13), area) 
    
    # Get the NDVI / AGB - Landsat / MODIS 
    
    
    

    
    # params to calculate psi 
    a = row.a
    b = row.b
    
    

Deciduous Forest - Areas dominated by trees generally greater than 5 meters tall, and greater than 20% of total vegetation cover. More than 75 percent of the tree species shed foliage simultaneously in response to seasonal change.


In [None]:
lcidx = meta['properties']['landcover_class_values'].index(lctype)
lctype = meta['properties']['landcover_class_names'][lcidx]

In [None]:

    lc = data['nlcd'].filterBounds(pt).filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')).filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING')).select('VV')
    col = collection.filterDate('2014-10-03','2018-10-03')
    t = col.sort('system:time_start')
    ims = t.toList(t.size())
    
    collection = ee.ImageCollection('COPERNICUS/S1_GRD').filterBounds(pt).filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')).filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING')).select('VV')
    col = collection.filterDate('2014-10-03','2018-10-03')
    t = col.sort('system:time_start')
    ims = t.toList(t.size())
    print(datetime.strftime(startdate, "%Y%m%d - %h%m%s"))
    

In [None]:
t = data['cdl'][0].filterDate(startdate,enddate).select('cropland').filterBounds(area).first()

In [None]:
t2 = t.reduceRegion(reducer=ee.Reducer.toList(), geometry = area, scale = 30)

In [None]:
image = ee.ImageCollection("USDA/NASS/CDL").filterDate(ee.Date(startdate), ee.Date(startdate).advance(1, 'month')).first()

In [None]:
t2 = image.reduceRegion(reducer=ee.Reducer.frequencyHistogram(), geometry = area, scale = 30)

In [None]:
print(ee.ImageCollection("USDA/NASS/CDL")
      .filterDate(ee.Date(startdate), ee.Date(startdate).advance(1, 'month')).mean()
      .reduceRegion(reducer=ee.Reducer.frequencyHistogram(), geometry = area, scale = 30)
      .getInfo())

In [None]:
# Helpers 

In [None]:
# EE --> Numpy functions

In [None]:
def array_from_latlon(latlon_obj):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.toList(),geometry=area,maxPixels=1e8,scale=10)
    data = np.array((ee.Array(res.get("VV")).getInfo()))
    lats = np.array((ee.Array(res.get("latitude")).getInfo()))
    lons = np.array((ee.Array(res.get("longitude")).getInfo()))
    out = make_np_array(data, lats, lons)
    return out   

def array_from_latlon_ppt(latlon_obj):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.toList(),geometry=area,maxPixels=1e8,scale=10)
    data = np.array((ee.Array(res.get("ppt")).getInfo()))
    lats = np.array((ee.Array(res.get("latitude")).getInfo()))
    lons = np.array((ee.Array(res.get("longitude")).getInfo()))
    out = make_np_array(data, lats, lons)
    return out   

def make_np_array(data, lats, lons):
    # get data from df as arrays
    lons = np.array(lons)
    lats = np.array(lats)
    data = np.array(data) # Set var here 
                                              
    # get the unique coordinates
    uniqueLats = np.unique(lats)
    uniqueLons = np.unique(lons)

    # get number of columns and rows from coordinates
    ncols = len(uniqueLons)    
    nrows = len(uniqueLats)

    # determine pixelsizes
    ys = uniqueLats[1] - uniqueLats[0] 
    xs = uniqueLons[1] - uniqueLons[0]

    # create an array with dimensions of image
    arr = np.zeros([nrows, ncols], np.float32)

    # fill the array with values
    counter =0
    for y in range(0,len(arr),1):
        for x in range(0,len(arr[0]),1):
            if lats[counter] == uniqueLats[y] and lons[counter] == uniqueLons[x] and counter < len(lats)-1:
                counter+=1
                arr[len(uniqueLats)-1-y,x] = data[counter] # we start from lower left corner
    
    return arr

def get_ims_dates(ims_list):
    imlist = []
    imdates = []
    num_images = len(ims_list.getInfo())

    for i in range (1, num_images):
        latlon = ee.Image.pixelLonLat().addBands(ims_list.get(i))
        imlist.append(array_from_latlon(latlon))
        info_dict = ims.get(i).getInfo()
        date = info_dict['id']
        imdates.append(date)
    
    times = []
    for i in imdates:
        time_idx = i.find("T0")
        ymd = i[time_idx-8:time_idx]
        hms = i[time_idx+1:time_idx+7]
        times.append(datetime.strptime(ymd+hms, '%Y%m%d%H%M%S'))

    return imlist, times

def get_ims_dates_ppt(ims_list):
    imlist = []
    imdates = []
    num_images = len(ims_list.getInfo())

    for i in range (1, num_images):
        latlon = ee.Image.pixelLonLat().addBands(ims_list.get(i))
        imlist.append(array_from_latlon_ppt(latlon))
        info_dict = ims.get(i).getInfo()
        date = info_dict['id']
        imdates.append(date)
    
    times = []
    for i in imdates:
        time_idx = i.find("T0")
        ymd = i[time_idx-8:time_idx]
        hms = i[time_idx+1:time_idx+7]
        times.append(datetime.strptime(ymd+hms, '%Y%m%d%H%M%S'))

    return imlist, times


In [None]:
# Find files

In [None]:
csvs = glob.glob("*.csv")
txts = glob.glob("*.txt")
site_file = txts[0]
stations_csv = csvs[0]

In [None]:
# Extract soil moisture site data

In [None]:
site_no = ''.join(c for c in site_file if c.isdigit())
data = read_file(site_file)
lat, lon = get_site_lat_lons(site_no,stations_csv)

In [None]:
# Now for each site id
# get the sm data file from "data" folder 
# Transform SM to LWP using a, b coeffs

In [None]:
site_ids = []
for i in sites.site_name:
    sid = i.split('(')
    site_ids.append(sid[1][:-1])

In [None]:
sites['id'] = site_ids

In [None]:
sites.to_file("../shape/scan_sites.shp")

In [None]:
sites

In [None]:
# Setup ee Objects

In [None]:
buffer_size = 10 # meters

In [None]:
pt = ee.Geometry.Point([lon, lat])
area = pt.buffer(buffer_size)

In [None]:
# Set the RS products to be queried, sort from oldest im first

In [None]:
collection = ee.ImageCollection('COPERNICUS/S1_GRD').filterBounds(pt).filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')).filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING')).select('VV')
col = collection.filterDate('2014-10-03','2018-10-03')
t = col.sort('system:time_start')
ims = t.toList(t.size())

In [None]:
# get the ims and sort out the dates to match

In [None]:
len(ims.getInfo())

In [None]:
s1dat, dates = get_ims_dates(ims)

In [None]:
data['Date'] =  pd.to_datetime(data['Date'], format='%Y%m%d %H:%M')
data.set_index('Date', inplace=True)

In [None]:
# Round each startdate to the nearest hour, grab the data that matches that hour 
rzsm = []
ssm = []

for i in dates:
    start = i.replace(second=0, microsecond=0, minute=0, hour=i.hour)+timedelta(hours=i.minute//30)
    df = pd.DataFrame(data.loc[start]).T.astype(np.float)
    rzsm.append(df[df.columns[-1]].values)
    ssm.append(df[df.columns[-3]].values)
    

In [None]:
rzsm = [x for l in rzsm for x in l]
ssms = [x for l in ssm for x in l]

In [None]:
mean_sigmas = []

for i in s1dat:
    mean_sigmas.append(np.mean(i))

In [None]:
x = np.array(mean_sigmas)
x = x.reshape(x.shape[0])
y = np.array(rzsm)
y = y.reshape(y.shape[0])
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

In [None]:
plt.figure(figsize=(8,6))

plt.title('''$\sigma$ vs Soil moisture 
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(dates), dates[0], dates[-1], buffer_size))

plt.xlabel("Backscatter ($\sigma$), dB")
plt.scatter(mean_sigmas, rzsm, label = 'rzsm')
plt.scatter(mean_sigmas, ssm, label = 'ssm')
plt.plot(x, intercept + slope*x, 'blue', label='rzsm y = {}x +{}, $r =$ {}'.format(round(slope,1), round(intercept,1), round(r_value,2)))
plt.xlabel("$\sigma$ (dB)")
plt.ylabel("Soil moisture (%)")
plt.legend()
plt.show()

In [None]:
# as a timeseries

In [None]:
fig, ax1 = plt.subplots(figsize=(15,6))

plt.title('''$\sigma$ vs Soil moisture Time Series
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(dates), dates[0], dates[-1], buffer_size))

color = 'tab:red'
ax1.set_xlabel('date')
ax1.set_ylabel("$\sigma$ (dB)", color=color)
ax1.plot(dates, mean_sigmas, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('rzsm', color=color)  
ax2.plot(dates,rzsm,color = color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout() 

plt.show()

In [None]:
# remove the days with high preceding rainfall 

In [None]:
imageCollection = ee.ImageCollection("OREGONSTATE/PRISM/AN81d").filterBounds(area)

In [None]:
def filter_date(product,y,m,d):
    start = ee.Date.fromYMD(y,m,d).advance(-1, "day")
    end = ee.Date.fromYMD(y,m,d)
    end = ee.Date.fromYMD(y,m,d)
    prod = product.filterDate(start, end).sort('system:time_start', False).select("ppt")
    return prod

def get_2day_precip(latlon_obj):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.sum(),geometry=area,scale=10)
    data = np.array((ee.Array(res.get("ppt")).getInfo()))
    out = np.array(data)
    return out 

In [None]:
rainfall = []

for i,x in enumerate(dates):
    y,m,d = dates[i].year, dates[i].month, dates[i].day
    t = filter_date(imageCollection, y, m, d).sum()
    precip_total = get_2day_precip(t)
    rainfall.append(precip_total)

In [None]:
fdf = pd.DataFrame([dates, mean_sigmas, rzsm, rainfall])

In [None]:
fdf = fdf.T
fdf.columns = (['date', "sigma", "rzsm", "rainfall"])

In [None]:
mask=(fdf['rainfall'] < 0.1)
asdf=fdf[mask]

In [None]:
fig, ax1 = plt.subplots(figsize=(15,6))

plt.title('''2 day sum Precip (mm) and Soil moisture (%) Time Series
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(dates), dates[0], dates[-1], buffer_size))

color = 'tab:blue'
ax1.set_xlabel('date')
ax1.set_ylabel("Precip (mm)", color=color)
ax1.plot(dates, rainfall, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('rzsm', color=color)  
ax2.plot(dates,rzsm,color = color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout() 

plt.show()

In [None]:
# Plot linear fit of sigma vs SM
x = np.array(asdf['sigma'])
x = x.reshape(x.shape[0]).astype(np.float32)
y = np.array(asdf['rzsm'])
y = y.reshape(y.shape[0]).astype(np.float32)

slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

plt.figure(figsize=(8,6))

plt.title('''$\sigma$ vs Soil moisture for overpasses without prior precipitation
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(asdf), asdf['date'].min(), asdf['date'].max(), buffer_size))

plt.xlabel("Backscatter ($\sigma$), dB")
plt.scatter(asdf['sigma'],asdf['rzsm'], label = "rzsm")
# plt.scatter(mean_sigmas, ssm, label = 'ssm')
plt.plot(x, intercept + slope*x, 'blue', label='rzsm y = {}x +{}, $r =$ {}'.format(round(slope,1), round(intercept,1), round(r_value,2)))
plt.xlabel("$\sigma$ (dB)")
plt.ylabel("Soil moisture (%)")
plt.legend()
plt.show()

In [None]:
# Let's test with 4 day precip accumulation

imageCollection = ee.ImageCollection("OREGONSTATE/PRISM/AN81d").filterBounds(area)

def filter_date(product,y,m,d):
    start = ee.Date.fromYMD(y,m,d).advance(-4, "day")
    end = ee.Date.fromYMD(y,m,d)
    end = ee.Date.fromYMD(y,m,d)
    prod = product.filterDate(start, end).sort('system:time_start', False).select("ppt")
    return prod

def get_2day_precip(latlon_obj):
    res = latlon_obj.reduceRegion(reducer=ee.Reducer.sum(),geometry=area,scale=10)
    data = np.array((ee.Array(res.get("ppt")).getInfo()))
    out = np.array(data)
    return out 

rainfall = []

for i,x in enumerate(dates):
    y,m,d = dates[i].year, dates[i].month, dates[i].day
    t = filter_date(imageCollection, y, m, d).sum()
    precip_total = get_2day_precip(t)
    rainfall.append(precip_total)
    

In [None]:
fdf = pd.DataFrame([dates, mean_sigmas, rzsm, rainfall])
fdf = fdf.T
fdf.columns = (['date', "sigma", "rzsm", "rainfall"])
mask=(fdf['rainfall'] < 0.1)
asdf=fdf[mask]

In [None]:
fig, ax1 = plt.subplots(figsize=(15,6))

plt.title('''4 day sum Precip (mm) and Soil moisture (%) Time Series
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(dates), dates[0], dates[-1], buffer_size))

color = 'tab:blue'
ax1.set_xlabel('date')
ax1.set_ylabel("3 day precipitation (mm)", color=color)
ax1.plot(dates, rainfall, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('rzsm', color=color)  
ax2.plot(dates,rzsm,color = color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout() 

plt.show()

In [None]:
# Plot linear fit of sigma vs SM
x = np.array(asdf['sigma'])
x = x.reshape(x.shape[0]).astype(np.float32)
y = np.array(asdf['rzsm'])
y = y.reshape(y.shape[0]).astype(np.float32)

slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

plt.figure(figsize=(8,6))

plt.title('''$\sigma$ vs Soil moisture for overpasses without prior precipitation (4 days)
          n_overpasses: {}  
          startdate: {} 
          enddate: {}
          buffer size: {}m'''.format(len(asdf), asdf['date'].min(), asdf['date'].max(), buffer_size))

plt.xlabel("Backscatter ($\sigma$), dB")
plt.scatter(asdf['sigma'],asdf['rzsm'], label = "rzsm")
# plt.scatter(mean_sigmas, ssm, label = 'ssm')
plt.plot(x, intercept + slope*x, 'blue', label='rzsm y = {}x +{}, $r =$ {}'.format(round(slope,1), round(intercept,1), round(r_value,2)))
plt.xlabel("$\sigma$ (dB)")
plt.ylabel("Soil moisture (%)")
plt.legend()
plt.show()