# Extract GEE data over all sites

## Setup
For each site, extract the full time series of Sentinel-1,NDVI data from the GEE. Note: if the output csv files already exist they are assumed to be correct and are not over-written.

Setup the proxy

In [1]:
import os
import pandas as pd
import ee
import utils
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:41091'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:41091'
ee.Initialize()

Set the parameters, paths etc.

In [2]:
# Parameters for Sentinel-1 data
START_DATE = "2016-01-01"
END_DATE = "2019-12-31"  #
S1_PRODUCT = "COPERNICUS/S1_GRD"
ORBIT_PASS_A = 'ASCENDING'
ORBIT_PASS_D = 'DESCENDING'
S1_BANDS = ["VV",
            "VH",
            "angle"]
IN_MOD = 'IW'


# Parameters for NDVI data
START_DATE_NDVI = "2015-12-01" # two month extension for interpolation
END_DATE_NDVI = "2020-01-31" 
MOD_PRODUCT = "MODIS/006/MOD13Q1"
MYD_PRODUCT = "MODIS/006/MYD13Q1"
NDVI_BANDS = ["NDVI",
         "EVI"]
interpolate_bands = ['NDVI', 'EVI', 'SMAP']
# Global setups, dir, path
BUFFER = 50 # a buffer of 50m to have a resolution of around 100 m
save_to_disk = False # No temporal files

HOME_DIR = r"E:\Zoho WorkDrive (YICODE)\My Folders\TimeSeriesRetrieval\Extension"
SM_SITES = os.path.join(HOME_DIR, "site_info.csv") # the site informaiton extracted by Preprocessing_ISMN_Raw_Data.ipynb
SMAP_FILE = os.path.join(HOME_DIR, "SMAP\SMAP.csv") # the SMAP data extracted by Extract the SMAP soil moisture.ipynb
SM_DIR = os.path.join(HOME_DIR,"daily_ave")
INPUT_DIR = os.path.join(HOME_DIR,"input")
if not os.path.exists(INPUT_DIR):
    os.mkdir(INPUT_DIR)
NDVI_DIR = os.path.join(HOME_DIR,"NDVI")
S1_DIR = os.path.join(HOME_DIR,"sentinel1")

Read the sites information

In [3]:
sites = pd.read_csv(SM_SITES, float_precision="high")

## A loop to prepare the input data of each site
###### 1 read the SMAP records for the site
###### 2 read the in-situ soil moisture
###### 3 extract the MYD and MOD NDVI over the site and merge the two NDVI series
###### 4 if ascending data was collected over the site, extract the ascending Sentinel-1 data and interpolate the NDVI and SMAP on all Sentinel-1 ascending acquisition dates
###### 5 if descending data was collected over the site, extract the descending Sentinel-1 data and interpolate the NDVI and SMAP on all Sentinel-1 Dscending acquisition dates
###### 6 concate all data and save

###### Note: the loop may report the error "IncompleteRead", just run this cell again.

In [9]:
from TimeseriesExtractor import GeeS1TimeseriesExtractor, GeeTimeseriesExtractor
# Global extractor for MYD13Q1 and MOD13Q1
MYD_Extractor = GeeTimeseriesExtractor(MYD_PRODUCT,NDVI_BANDS,START_DATE_NDVI,END_DATE_NDVI,NDVI_DIR,save_to_disk)
MOD_Extractor = GeeTimeseriesExtractor(MOD_PRODUCT,NDVI_BANDS,START_DATE_NDVI,END_DATE_NDVI,NDVI_DIR,save_to_disk)
# Read all related SMAP soil moisutre data
df_SMAP = pd.read_csv(SMAP_FILE,index_col='time',parse_dates=True)

for site_idx, site in sites.iterrows():
    if os.path.exists(os.path.join(INPUT_DIR,f'{site.network}_{site.station}.csv')):
        print(f'Site: {site.network}_{site.station}, Done before')
        df=pd.read_csv(os.path.join(INPUT_DIR,f'{site.network}_{site.station}.csv'))
        continue
    print(f'Extracting data for site: {site.network}_{site.station}')
    # Read SMAP data
    df_site_SMAP = df_SMAP['r%sc%s'%(site.EASE_row,site.EASE_column)]
    df_site_SMAP=df_site_SMAP.rename('SMAP')
    
    # Read ground sm measurements
    SM_site_file = os.path.join(SM_DIR, f'{site.network}_{site.station}.csv')
    df_gt = pd.read_csv(SM_site_file,index_col='time',parse_dates=True)
    
    # Extract NDVI
    site_geometry = ee.Geometry.Point(site.lon, site.lat)
    MYD=MYD_Extractor.download_data(site_geometry)
    MOD=MOD_Extractor.download_data(site_geometry)
    df_NDVI=pd.concat([MYD,MOD]).sort_index()
    df_NDVI[NDVI_BANDS]=df_NDVI[NDVI_BANDS].astype('float')/10000 # Remove the scale of 10000 
    df_NDVI=df_NDVI.groupby(level=0).mean()    
    
    # Setup for Sentinel-1 extraction
    site_geometry = ee.Geometry.Point(site.lon, site.lat).buffer(BUFFER) # Build GEE point geometry with a buffer
    df_all = pd.DataFrame(columns=['time','Excel_day', 'DoY', 'VV', 'VH', 'angle', 'relative_orbit', 'platform',
       'orbit_pass', 'NDVI', 'EVI', 'SMAP', 'sm', 'ts', 'sm_count']).set_index('time')
    
    # Extract Sentinel-1 Ascending data, interpolate the NDVI and SMAP on all Sentinel-1 Ascending acquisition dates
    S1_extractor_A = GeeS1TimeseriesExtractor(S1_PRODUCT,START_DATE, END_DATE,S1_BANDS,site_geometry,ORBIT_PASS_A,IN_MOD,S1_DIR,save_to_disk)
    if S1_extractor_A.image_size > 0:
        df_S1A = S1_extractor_A.get_and_save_data(f'{site.network}_{site.station}')
        # Merge S1,NDVI,SMAP and interpolate the NDVI and SMAP on all Sentinel-1 acquisition dates
        df_allA = pd.concat([df_S1A,df_NDVI,df_site_SMAP],axis=1).sort_index()   
        df_allA[interpolate_bands]=df_allA[interpolate_bands].interpolate(limit_area = 'inside')
        df_allA = pd.concat([df_allA, df_gt],axis=1)
        df_allA=df_allA.loc[df_S1A.index]; # only keep the dates with S1 observations 
        df_allA = df_allA.loc[:,~df_allA.columns.duplicated()] # remove duplicate columns
        df_all=df_all.append(df_allA)
        
    # Extract Sentinel-1 Ascending data, interpolate the NDVI and SMAP on all Sentinel-1 Dscending acquisition dates    
    S1_extractor_D = GeeS1TimeseriesExtractor(S1_PRODUCT,START_DATE, END_DATE,S1_BANDS,site_geometry,ORBIT_PASS_D,IN_MOD,S1_DIR,save_to_disk)
    if S1_extractor_D.image_size > 0:
        df_S1D = S1_extractor_D.get_and_save_data(f'{site.network}_{site.station}')
        df_allD = pd.concat([df_S1D,df_NDVI,df_site_SMAP],axis=1).sort_index()   
        df_allD[interpolate_bands]=df_allD[interpolate_bands].interpolate(limit_area = 'inside')
        df_allD = pd.concat([df_allD, df_gt],axis=1)
        df_allD=df_allD.loc[df_S1D.index]; # only keep the dates with S1 observations 
        df_allD = df_allD.loc[:,~df_allD.columns.duplicated()] # remove duplicate columns
        df_all=df_all.append(df_allD)

    # Save data
    df_all=df_all.sort_index()
    df_all.to_csv(os.path.join(INPUT_DIR,f'{site.network}_{site.station}.csv'))

Site: AMMA-CATCH_Banizoumbou, Done before
Site: AMMA-CATCH_Belefoungou-Mid, Done before
Site: AMMA-CATCH_Belefoungou-Top, Done before
Site: AMMA-CATCH_Nalohou-Mid, Done before
Site: AMMA-CATCH_Nalohou-Top, Done before
Site: AMMA-CATCH_Tondikiboro, Done before
Site: AMMA-CATCH_Wankama, Done before
Site: PBO_H2O_Bethlehem, Done before
Site: PBO_H2O_Mafikeng, Done before
Site: PBO_H2O_Newcastle, Done before
Site: PBO_H2O_Sutherland, Done before
Site: PBO_H2O_Upington, Done before
Site: SD_DEM_Demokeya, Done before
Site: TAHMO_CRIG_(Soil_Moisture_Station_1), Done before
Site: TAHMO_CRIG_(Soil_Moisture_Station_2), Done before
Site: TAHMO_CSIR-SARI,_Nyankpala_-_Tamale, Done before
Site: TAHMO_KNUST_Farm,_Kumasi, Done before
Site: CTP_SMTMN_L01, Done before
Site: CTP_SMTMN_L02, Done before
Site: CTP_SMTMN_L03, Done before
Site: CTP_SMTMN_L04_M02, Done before
Site: CTP_SMTMN_L05_M06, Done before
Site: CTP_SMTMN_L06_M10, Done before
Site: CTP_SMTMN_L07_M13, Done before
Site: CTP_SMTMN_L08_M14, 

Site: HOBE_3.03, Done before
Site: HOBE_3.04, Done before
Site: HOBE_3.05, Done before
Site: HOBE_3.06, Done before
Site: HOBE_3.07, Done before
Site: HOBE_3.08, Done before
Site: HOBE_3.09, Done before
Site: HYDROL-NET_PERUGIA_Water_Engineering_Experimental_Field_1, Done before
Site: HYDROL-NET_PERUGIA_Water_Engineering_Experimental_Field_2, Done before
Site: REMEDHUS_Canizal, Done before
Site: REMEDHUS_Carretoro, Done before
Site: REMEDHUS_Casa_Periles, Done before
Site: REMEDHUS_Concejo_del_Monte, Done before
Site: REMEDHUS_El_Coto, Done before
Site: REMEDHUS_El_Tomillar, Done before
Site: REMEDHUS_Granja_g, Done before
Site: REMEDHUS_Guarrati, Done before
Site: REMEDHUS_La_Atalaya, Done before
Site: REMEDHUS_La_Cruz_de_Elias, Done before
Site: REMEDHUS_Las_Arenas, Done before
Site: REMEDHUS_Las_Bodegas, Done before
Site: REMEDHUS_Las_Brozas, Done before
Site: REMEDHUS_Las_Eritas, Done before
Site: REMEDHUS_Las_Tres_Rayas, Done before
Site: REMEDHUS_Las_Vacas, Done before
Site: REME

Site: SCAN_Adams_Ranch_#1, Done before
Site: SCAN_Alcalde, Done before
Site: SCAN_Alkali_Mesa, Done before
Site: SCAN_Allen_Farms, Done before
Site: SCAN_Ames, Done before
Site: SCAN_Ash_Valley, Done before
Site: SCAN_Beasley_Lake, Done before
Site: SCAN_Beaumont, Done before
Site: SCAN_Blue_Creek, Done before
Site: SCAN_Bodie_Hills, Done before
Site: SCAN_Bosque_Seco, Done before
Site: SCAN_Bragg_Farm, Done before
Site: SCAN_Broad_Acres, Done before
Site: SCAN_Buckhorn, Done before
Site: SCAN_Buffalo_Jump, Done before
Site: SCAN_Busby_Farm, Done before
Site: SCAN_Bushland_#1, Done before
Site: SCAN_Cache_Junction, Done before
Site: SCAN_Carver_Farm, Done before
Site: SCAN_Cave_Valley, Done before
Site: SCAN_Centralia_Lake, Done before
Site: SCAN_Charkiln, Done before
Site: SCAN_Chicken_Ridge, Done before
Site: SCAN_Circleville, Done before
Site: SCAN_CMRB_LTAR-MO, Done before
Site: SCAN_Cochora_Ranch, Done before
Site: SCAN_Combate, Done before
Site: SCAN_Conrad_Ag_Rc, Done before
Sit

Site: SNOTEL_BRISTLECONE_TRAIL, Done before
Site: SNOTEL_BROWN_DUCK, Done before
Site: SNOTEL_BROWN_TOP, Done before
Site: SNOTEL_BRUMLEY, Done before
Site: SNOTEL_BUCKBOARD_FLAT, Done before
Site: SNOTEL_BUCK_FLAT, Done before
Site: SNOTEL_BUCKINGHORSE, Done before
Site: SNOTEL_BUCK_PASTURE, Done before
Site: SNOTEL_BUCKSKIN_JOE, Done before
Site: SNOTEL_BUCKSKIN_LOWER, Done before
Site: SNOTEL_BUG_LAKE, Done before
Site: SNOTEL_BURNSIDE_LAKE, Done before
Site: SNOTEL_BURNT_MOUNTAIN, Done before
Site: SNOTEL_BURRO_MOUNTAIN, Done before
Site: SNOTEL_BURTS_MILLER_RANCH, Done before
Site: SNOTEL_BUTTE, Done before
Site: SNOTEL_CAMP_JACKSON, Done before
Site: SNOTEL_CARSON_PASS, Done before
Site: SNOTEL_CASCADE_#2, Done before
Site: SNOTEL_CASCADE_MOUNTAIN, Done before
Site: SNOTEL_CASTLE_VALLEY, Done before
Site: SNOTEL_CAVE_MOUNTAIN, Done before
Site: SNOTEL_CAYUSE_PASS, Done before
Site: SNOTEL_Chalender, Done before
Site: SNOTEL_CHALK_CREEK_#1, Done before
Site: SNOTEL_CHALK_CREEK_#2,

Site: SNOTEL_Panguitch_Lake_RS, Done before
Site: SNOTEL_PARADISE, Done before
Site: SNOTEL_PARK_CONE, Done before
Site: SNOTEL_PARK_CREEK_RIDGE, Done before
Site: SNOTEL_PARK_RESERVOIR, Done before
Site: SNOTEL_PARLEYS_SUMMIT, Done before
Site: SNOTEL_PARRISH_CREEK, Done before
Site: SNOTEL_PAYSON_R.S., Done before
Site: SNOTEL_PHANTOM_VALLEY, Done before
Site: SNOTEL_PICKLE_KEG, Done before
Site: SNOTEL_PIERCE_R.S., Done before
Site: SNOTEL_PINE_CREEK, Done before
Site: SNOTEL_POCKET_CREEK, Done before
Site: SNOTEL_POISON_FLAT, Done before
Site: SNOTEL_Pole_Canyon, Done before
Site: SNOTEL_POLE_CREEK_R.S., Done before
Site: SNOTEL_PORPHYRY_CREEK, Done before
Site: SNOTEL_QUARTZ_MOUNTAIN, Done before
Site: SNOTEL_QUARTZ_PEAK, Done before
Site: SNOTEL_Quemazon, Done before
Site: SNOTEL_RAGGED_MOUNTAIN, Done before
Site: SNOTEL_RAINBOW_CANYON, Done before
Site: SNOTEL_RAINY_PASS, Done before
Site: SNOTEL_Redden_Mine_Lwr, Done before
Site: SNOTEL_RED_PINE_RIDGE, Done before
Site: SNOTEL_

Site: USCRN_Asheville_8_SSW, Done before
Site: USCRN_Austin_33_NW, Done before
Site: USCRN_Avondale_2_N, Done before
Site: USCRN_Baker_5_W, Done before
Site: USCRN_Batesville_8_WNW, Done before
Site: USCRN_Bedford_5_WNW, Done before
Site: USCRN_Blackville_3_W, Done before
Site: USCRN_Bodega_6_WSW, Done before
Site: USCRN_Boulder_14_W, Done before
Site: USCRN_Bowling_Green_21_NNE, Done before
Site: USCRN_Brigham_City_28_WNW, Done before
Site: USCRN_Bronte_11_NNE, Done before
Site: USCRN_Brunswick_23_S, Done before
Site: USCRN_Buffalo_13_ESE, Done before
Site: USCRN_Cape_Charles_5_ENE, Done before
Site: USCRN_Champaign_9_SW, Done before
Site: USCRN_Charlottesville_2_SSE, Done before
Site: USCRN_Chatham_1_SE, Done before
Site: USCRN_Chillicothe_22_ENE, Done before
Site: USCRN_Cortez_8_SE, Done before
Site: USCRN_Corvallis_10_SSW, Done before
Site: USCRN_Coshocton_8_NNE, Done before
Site: USCRN_Crossville_7_NW, Done before
Site: USCRN_Darrington_21_NNE, Done before
Site: USCRN_Denio_52_WSW

## remove sites without ground soil moisture measurements 