In [7]:
import sys
from pathlib import Path
import time
import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from eeSAR.s1 import s1_collection, s1_timescan
from datetime import datetime as dt, timedelta
import ee
ee.Initialize()

In [2]:
# this is our file with tscan, srtm, landcover, gldas and gpm already sampled
gdf = gpd.read_file('/home/vollrath/Indonesia_sm/samples_s1_aux/gdf_s1_aux.gpkg')
len(gdf)

2928

In [3]:
def addExtra(image):
    
    def addGEDI(image):
        
        gedi = ee.ImageCollection('users/potapovpeter/GEDI_V27').mosaic().clip(image.geometry().bounds()).rename('canopy_height')
        
        return image.addBands(gedi)
    
    
    def addHansen(image):
        

        coll_dict = {
            '2012': ee.Image("UMD/hansen/global_forest_change_2012_v1_0"),
            '2013': ee.Image("UMD/hansen/global_forest_change_2013_v1_1"),
            '2014': ee.Image("UMD/hansen/global_forest_change_2014_v1_2"),
            '2015': ee.Image("UMD/hansen/global_forest_change_2015_v1_3"),
            '2016': ee.Image("UMD/hansen/global_forest_change_2016_v1_4"),
            '2017': ee.Image("UMD/hansen/global_forest_change_2017_v1_5"),
            '2018': ee.Image("UMD/hansen/global_forest_change_2018_v1_6"),
            '2019': ee.Image("UMD/hansen/global_forest_change_2019_v1_7"),
        }
        
        year = image.getInfo()['properties']['system:index'][17:21]
        year = '2019' if year == '2020' else year
        
        hansen = coll_dict[year]
        
        b3 = hansen.select(['last_b30'], ['B3'])
        b4 = hansen.select(['last_b40'], ['B4'])
        b5 = hansen.select(['last_b50'], ['B5'])
        b7 = hansen.select(['last_b70'], ['B7'])
        
        ndvi = (b4.subtract(b3)).divide(b4.add(b3)).rename('ndvi')
        ndmi = (b4.subtract(b5)).divide(b4.add(b5)).rename('ndmi')
        ndbri = (b4.subtract(b7)).divide(b4.add(b7)).rename('ndbri')
        
        return image.addBands(b3).addBands(b4).addBands(b5).addBands(b7).addBands(ndvi).addBands(ndmi).addBands(ndbri)
                              
    
    image = addGEDI(image)
    image = addHansen(image)
    
    return image

In [4]:
def get_Aux(i, row):
    
    point = ee.Geometry.Point(row.lon, row.lat)
    
    # get the image
    s1 = ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\
        .filter(ee.Filter.eq('system:index', row['scene_id'])).first()

    image = addExtra(s1)
    
    image_red = image.reduceRegion(
        reducer=ee.Reducer.toList(),\
        geometry=point.buffer(150),\
        maxPixels=1e13,\
        scale=100
    );

    data_dict = image_red.getInfo()

    bandlist = [
     #'VV', 'VH', 'VVVH_ratio', 'angle', 'LIA', #'layover', 'shadow', 'no_data_mask', 
     #'precipitation', 'prec_3', 'prec_7', 'prec_30', 
     #'gldas_mean', 'gldas_stddev',
     #'sm_1', 'sm_3', 'sm_7', 'sm_30',
     #'elevation', 'aspect', 'slope', 
     #'landcover',
     #'kVV_mean', 'kVV_stdDev', 'kVV_p5', 'kVV_p95', 'kVH_mean', 'kVH_stdDev', 'kVH_p5', 'kVH_p95', 
     #'VV_mean', 'VV_stdDev', 'VV_p5', 'VV_p95', 'VH_mean', 'VH_stdDev', 'VH_p5', 'VH_p95',
     'canopy_height',
     'B7', 'B5', 'B4', 'B3', 'ndvi', 'ndmi', 'ndbri'
    ]

    for band in bandlist:
        if band == 'landcover':
            counts = np.bincount(data_dict[band])
            row[band] = np.argmax(counts)
        else:
            row[band] = np.mean(data_dict[band])

    d = {}
    d[i] = row
    df = pd.DataFrame.from_dict(d, orient='index')
    df.to_pickle(f'/home/vollrath/Indonesia_sm/samples_extra/{i}.extra.pickle')

In [5]:
from multiprocessing import Process
j = 0
with tqdm(initial=j, total=len(gdf), file=sys.stdout) as pbar:
    for i, row in gdf.iterrows():
        
        if i >= j:
        
            ec = None
            while ec is None != 0:
                p1 = Process(target=get_Aux, args=(i, row, ), name='Process')
                p1.start()
                p1.join(timeout=180)
                p1.terminate()
                ec = p1.exitcode

            pbar.set_description('processed: %d' % (i))
            pbar.update(1)

processed: 2927: 100%|██████████| 2928/2928 [1:23:51<00:00,  1.72s/it]


In [8]:
for i, file in enumerate(Path('/home/vollrath/Indonesia_sm/samples_extra/').glob('*.extra.pickle')):
#    print(file)
    if i == 0:
        df = pd.read_pickle(file)
    else:
        df = pd.concat([df, pd.read_pickle(file)])

In [9]:
df_no_nans = df.dropna()
print(len(df))
print(len(df_no_nans))
gdf = gpd.GeoDataFrame(df_no_nans, geometry=gpd.points_from_xy(df_no_nans.lon, df_no_nans.lat))
gdf.to_file('/home/vollrath/Indonesia_sm/samples_extra/gdf_s1_extra.gpkg', driver='GPKG')

2928
2928


In [12]:
gdf.columns

Index(['station', 'index', 'location', 'province', 'lon', 'lat', 'GWL_max',
       'GWL_min', 'GWL_rata', 'SM_max', 'SM_min', 'SM_rata', 'Total',
       'scene_id', 'ee_time', 'ee_geom', 'orbit_direction', 'acq_date', 'VV',
       'VH', 'VVVH_ratio', 'angle', 'LIA', 'elevation', 'aspect', 'slope',
       'landcover', 'kVV_mean', 'kVV_stdDev', 'kVV_p5', 'kVV_p95', 'kVH_mean',
       'kVH_stdDev', 'kVH_p5', 'kVH_p95', 'VV_mean', 'VV_stdDev', 'VV_p5',
       'VV_p95', 'VH_mean', 'VH_stdDev', 'VH_p5', 'VH_p95', 'precipitation',
       'prec_3', 'prec_7', 'prec_30', 'sm_1', 'sm_3', 'sm_7', 'sm_30',
       'geometry', 'canopy_height', 'B7', 'B5', 'B4', 'B3', 'ndvi', 'ndmi',
       'ndbri'],
      dtype='object')