## Prepare samples using GEE

### Setup
For each site, extract the full time series of Sentinel-1,NDVI data from the GEE. Note: if the output csv files already exist they are assumed to be correct and are not over-written.

Note: Proxy was set for the well known reason in China and you may not need it. Also check the proxy in the utils.py

In [1]:
import os
import pandas as pd
import numpy as np
import time
import ee
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7897' # Setup the proxy if required
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7897'
ee.Authenticate() # authenticate the gee account
import utils_data_pre

Set the parameters, paths etc.

In [2]:
HOME_DATA_DIR = r'E:\Demo\data_pre' # change the dir
# Date range for Sentinel-1 data
START_DATE = "2015-01-01"
END_DATE = "2022-12-31"  #
# Date range for NDVI and weather data,one year preceding the Sentinel-1
START_DATE_NDVI = "2014-01-01" # 
END_DATE_NDVI = "2023-01-31" #
# Global setups, dir, path
save_to_disk = False # No temporal files
SM_SITES = os.path.join(HOME_DATA_DIR, "stations.csv") # the site informaiton extracted by Preprocessing_ISMN_Raw_Data.ipynb
dir_to_site_sm = os.path.join(HOME_DATA_DIR, "station_sm") # the path to the soil moisture of stations
dir_to_site_samples = os.path.join(HOME_DATA_DIR, "output")

Read the sites information and determine the grid size

In [3]:
sites = pd.read_csv(SM_SITES, float_precision="high")
grid_size = 0.05 # km
pobj=utils_data_pre.grids_4_a_region(4326,grid_size) # determine the grid size

## A loop to prepare the input data of each site

###### 1 Create the gird polygon covering a site in both EASE2.0 and WGS84
###### 2 Extract Sentienl-1, soil texture, terrain, NDVI, precipition, temperature etc. Check the utils for the details
###### 3 Concatenate all data
###### 4 Extract the ground soil moisture of the site
###### Note: the loop may report the error "IncompleteRead", just run this cell again.

In [4]:
for i in range(len(sites)):#len(sites)
    site = sites.loc[i]
    path_2_site_file = os.path.join(dir_to_site_samples,'%s.csv'%site.Site)
    if os.path.exists(path_2_site_file):
        continue
    ring_wgs,grid_ring=pobj.get_wgs_grid(site.lon,site.lat)
    polygon_grid=ee.Geometry.Polygon(ring_wgs, 'EPSG:4326', True, 20, False)
    samples,df_S1=utils_data_pre.samples_4_grid_v1(polygon_grid,START_DATE, END_DATE,START_DATE_NDVI,END_DATE_NDVI,ring_wgs,pobj)
    
    # include the ground truth of soil moisture
    station_sm=pd.read_csv(os.path.join(dir_to_site_sm,'%s.csv'%site.Site),parse_dates=['date'])
    sm_point=station_sm[station_sm.date.dt.date.isin(list(df_S1.date.dt.date))]['SM']/100
    df_S1.loc[df_S1.date.dt.date.isin(list(station_sm.date.dt.date)),'sm_25']=list(sm_point)
    
    samples=pd.DataFrame(samples,index=df_S1.index)
    samples=pd.concat([df_S1,samples],axis=1)
    samples.to_csv(path_2_site_file)
    time.sleep(10)