In [1]:
import sys
import time
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from eeSAR.s1 import s1_collection, s1_timescan
from datetime import datetime as dt, timedelta
import ee
ee.Initialize()

In [10]:
# this is our file with tscan, srtm, landcover already sampled
gdf = gpd.read_file('/home/vollrath/Indonesia_sm/samples_all/combined_s1_extract.gpkg')

In [3]:
def addAux(image):
    
        
    def addGLDAS(image):

        def set_resample(image):
            """ Set resampling of the image to bilinear"""
            return image.resample()
        
        def add_date_difference(image):

            return image.set(
                'dateDist',
                ee.Number(image.get('system:time_start')).subtract(t.millis()).abs()
            )


        t = image.date()
        fro = t.advance(ee.Number(-30), 'days')
        #to = t.advance(ee.Number(10), 'days')

        gldas = ee.ImageCollection("NASA/GLDAS/V021/NOAH/G025/T3H") \
            .select('SoilMoi0_10cm_inst') \
            .filterBounds(image.geometry()) \
            .map(set_resample)\
            
        #gldas_stat = gldas.reduce(
        #        ee.Reducer.mean().combine(ee.Reducer.stdDev(), None, True)
        #    ).rename('gldas_mean', 'gldas_stddev')
        
        gldas = gldas.filterDate(fro, t).map(add_date_difference)
            
        sm_gldas = gldas.sort('dateDist').first().rename('sm_1')

        gldas_3day = gldas.filterDate(t.advance(ee.Number(-3), 'days'), t)
        gldas_3day = gldas_3day.sum().divide(gldas_3day.count()).rename('sm_3')

        gldas_7day = gldas.filterDate(t.advance(ee.Number(-7), 'days'), t)
        gldas_7day = gldas_7day.sum().divide(gldas_7day.count()).rename('sm_7')

        gldas_30day = gldas.sum().divide(gldas.count()).rename('sm_30')

        # image = image.addBands(gldas_stat)
        return image.addBands(sm_gldas).addBands(gldas_3day).addBands(gldas_7day).addBands(gldas_30day)
    
    
    def addGPM(image):

        def set_resample(image):
            """ Set resampling of the image to bilinear"""
            return image.resample()

        def add_date_difference(image):

            return image.set(
                'dateDist',
                ee.Number(image.get('system:time_start')).subtract(t.millis()).abs()
            )

        t = image.date()
        # t = ee.Date(feature.get('date').getInfo()['value'])
        fro = t.advance(ee.Number(-30), 'days')

        gpm = ee.ImageCollection('NASA/GPM_L3/IMERG_V06') \
            .filterBounds(image.geometry()) \
            .filterDate(fro, t) \
            .select('HQprecipitation') \
            .map(add_date_difference) \
            .map(set_resample)


        gpm_closest = gpm.filterDate(t.advance(ee.Number(-1), 'days'), t)  
        gpm_closest = gpm_closest.sum().divide(gpm_closest.count()).rename('precipitation')

        gpm_3day = gpm.filterDate(t.advance(ee.Number(-3), 'days'), t)
        gpm_3day = gpm_3day.sum().divide(gpm_3day.count()).rename('prec_3')

        gpm_7day = gpm.filterDate(t.advance(ee.Number(-7), 'days'), t)
        gpm_7day = gpm_7day.sum().divide(gpm_7day.count()).rename('prec_7')

        gpm_30day = gpm.sum().divide(gpm.count()).rename('prec_30')

        return image.addBands(gpm_closest).addBands(gpm_3day).addBands(gpm_7day).addBands(gpm_30day)

    image = addGLDAS(image)
    image = addGPM(image)
    return image

In [4]:
def get_Aux(i, row):
    
    point = ee.Geometry.Point(row.lon, row.lat)
    
    # get the image
    s1 = ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\
        .filter(ee.Filter.eq('system:index', row['scene_id'])).first()

    image = addAux(s1)

    image_red = image.reduceRegion(
        reducer=ee.Reducer.toList(),\
        geometry=point.buffer(150),\
        maxPixels=1e13,\
        scale=100
    );

    data_dict = image_red.getInfo()

    bandlist = [
     #'VV', 'VH', 'VVVH_ratio', 'angle', 'LIA', #'layover', 'shadow', 'no_data_mask', 
     'precipitation', 'prec_3', 'prec_7', 'prec_30', 
     #'gldas_mean', 'gldas_stddev',
     'sm_1', 'sm_3', 'sm_7', 'sm_30',
     #'elevation', 'aspect', 'slope', 
     #'landcover',
     #'kVV_mean', 'kVV_stdDev', 'kVV_p5', 'kVV_p95', 'kVH_mean', 'kVH_stdDev', 'kVH_p5', 'kVH_p95', 
     #'VV_mean', 'VV_stdDev', 'VV_p5', 'VV_p95', 'VH_mean', 'VH_stdDev', 'VH_p5', 'VH_p95'
    ]

    for band in bandlist:
        if band == 'landcover':
            counts = np.bincount(data_dict[band])
            row[band] = np.argmax(counts)
        else:
            row[band] = np.mean(data_dict[band])

    d = {}
    d[i] = row
    df = pd.DataFrame.from_dict(d, orient='index')
    df.to_pickle(f'/home/vollrath/Indonesia_sm/samples_all/{i}.aux.pickle')

In [None]:
from multiprocessing import Process
j = 496
with tqdm(initial=j, total=len(gdf), file=sys.stdout) as pbar:
    for i, row in gdf.iterrows():
        
        if i >= j:
        
            ec = None
            while ec is None != 0:
                p1 = Process(target=get_Aux, args=(i, row, ), name='Process')
                p1.start()
                p1.join(timeout=180)
                p1.terminate()
                ec = p1.exitcode

            pbar.set_description('processed: %d' % (i))
            pbar.update(1)
            # get_Aux(i, row)
        
        

        
        

processed: 1495:  51%|█████     | 1496/2928 [2:04:42<1:54:01,  4.78s/it] 