# Extract Satellite Imagery to Survey Locations

## Setup

In [1]:
import ee
#ee.Authenticate()
ee.Initialize()

In [20]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import glob
import config as cf
import pandas as pd
import time
import geopandas as gpd
from datetime import datetime
#import eeconvert

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

## Parameters

In [21]:
SURVEY_NAME = 'DHS'
REEXTRACT_IF_FILE_EXISTS = False

## Functions

In [22]:
# https://gis.stackexchange.com/questions/257727/iterate-over-imagecollection-returning-pandas-dataframe-using-earth-engine-pyt
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df = df.drop(columns=['geometry'])
    return df

def survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    POLLUTION_SCALE = 10000
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]
        
        #ur = survey_df_i['urban_rural'].iloc[0]
        #if ur == 'U':
        #    buffer_size = buffer_size_urban
        #elif ur == 'R':
        #    buffer_size = buffer_size_rural
        buffer_size = buffer_size_urban

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0],
                          'year': str(survey_df_i['year'].iloc[0])})
        
        f_i = f_i.buffer(buffer_size)

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def extract_sat(survey_df, buffer_size_urban, buffer_size_rural, year, satellite, survey_name, file_name):
    '''
    Extract satellite imagery to locations 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    POLLUTION_SCALE = 10000 # HERE!
        
    year_start_sp5 = "2018-01-01"
    year_end_sp5 = '2020-12-31'
    
    # Prep worldpop -----------------------------------------------
    if satellite == 'worldpop':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = max([2000, year])
                
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
        
    # Prep worldpop_2020 ---------------------------------------------
    if satellite == 'worldpop2020':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = '2020'
        
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
            
    # Sentinel-5P OFFL AER AI: Offline UV Aerosol Index  -------------------
    if satellite == 'uv_aer':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_AER_AI")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['absorbing_aerosol_index']
        
    # Sentinel-5P OFFL CO: Offline Carbon Monoxide  -------------------
    if satellite == 'CO':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CO_column_number_density', 'H2O_column_number_density']
        
    # Sentinel-5P OFFL HCHO: Offline Formaldehyde  -------------------
    if satellite == 'HCHO':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['tropospheric_HCHO_column_number_density', 'tropospheric_HCHO_column_number_density_amf']
        
    # Sentinel-5P Nitrogen Dioxide  -----------------------------
    if satellite == 'NO2':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['NO2_column_number_density', 'tropospheric_NO2_column_number_density',\
                 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density']
        
    # Sentinel-5P OFFL O3: Offline Ozone  -------------------
    if satellite == 'ozone':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_O3")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['O3_column_number_density', 'O3_effective_temperature']
        
    # Sentinel-5P OFFL SO2: Offline Sulphur Dioxide  -------------------
    if satellite == 'SO2':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_SO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['SO2_column_number_density', 'SO2_column_number_density_amf', 'SO2_slant_column_number_density']
        
    # Sentinel-5P OFFL CH4: Offline Methane  -------------------
    if satellite == 'CH4':
        
        # Scale
        #SCALE = 1113.2 # takes too long
        #SCALE = 10000
        SCALE = POLLUTION_SCALE
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CH4")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CH4_column_volume_mixing_ratio_dry_air']
        
    # CSP gHM: Global Human Modification ---------------------------------
    if satellite == 'GlobalHumanModification':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.ImageCollection("CSP/HM/GlobalHumanModification")\
            .median()
        
        # Original name is "gHM", but because only one value, it takes the
        # name of the reducer; we use mean
        BANDS = ['mean']
        
    # WorldClim BIO Variables V1 ---------------------------------
    if satellite == 'worldclim_bio':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('WORLDCLIM/V1/BIO')
        
        BANDS = ['bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10',\
                 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'elevation':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        
        # elevation?
        BANDS = ['mean']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'slope':
        # https://developers.google.com/earth-engine/datasets/catalog/CGIAR_SRTM90_V4#description
        
        # Scale
        SCALE = 500 # ok to upscale
                
        image_raw = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        image_elev = image_raw.select('elevation')
        image = ee.Terrain.slope(image_elev)
                
        # mean?
        BANDS = ['mean']
        
    # Prep l5 ---------------------------------------------------
    if satellite == 'l5':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        ### Year
        # (1) landsat 5 starts in March 1984; if year is less than
        #     1985, use 1985 as year (to ensure have year before and after)
        # (2) landsat 5 ends in May 2012; if year is greater than
        #     2011, use 2011 as year
        if year < 1985:
            year_use = 1985
        elif year > 2011:
            year_use = 2011
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        #image = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2')\
        #    .filterDate(year_minus_str, year_plus_str)\
        #    #.map(cloud_mask_landsatSR)\
        #    .median()\
        #    .multiply(0.0001)
        
        image = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        ndbi = image.normalizedDifference(['B5', 'B4']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'NDVI', 'NDBI', 'BU']
        
    # Prep l7 ---------------------------------------------------
    if (satellite == 'l7') | (satellite == 'l7_sdspace'):
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 7 starts in May 1999; if year is less than
        # 2000, use 2000 as year (to ensure have year before and after)
        if year < 2000:
            year_use = 2000
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        ndbi = image.normalizedDifference(['B5', 'B4']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'NDVI', 'NDBI', 'BU']
        
    # Prep l7 ---------------------------------------------------
    if satellite == 'l7_sdtime':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 7 starts in May 1999; if year is less than
        # 2000, use 2000 as year (to ensure have year before and after)
        if year < 2000:
            year_use = 2000
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .reduce(ee.Reducer.stdDev())
        
        BANDS = ['B1_stdDev', 'B2_stdDev', 'B3_stdDev', 'B4_stdDev', 'B5_stdDev', 'B6_stdDev', 'B7_stdDev']
                
    # Prep l8 ---------------------------------------------------
    if (satellite == 'l8') | (satellite == 'l8_sdspace'):
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI');
        ndbi = image.normalizedDifference(['B6', 'B5']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 'NDBI', 'BU']
        
    # Prep l8 ---------------------------------------------------
    if satellite == 'l8_sdtime':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .reduce(ee.Reducer.stdDev())
        
        BANDS = ['B1_stdDev', 'B2_stdDev', 'B3_stdDev', 'B4_stdDev', 'B5_stdDev', 'B6_stdDev', 'B7_stdDev', 'B10_stdDev', 'B11_stdDev']
                
    # Prep s2 ---------------------------------------------------
    if satellite == 's2':
        
        SCALE = 100 # ok to upscale
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2018
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-12-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .median()\
            .multiply(0.0001)
        
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

        image = image.select(BANDS) 
      
    # Prep SAR Median - HH/HV DESC ---------------------------------------------------
    # https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S1_GRD
    if 's1_sar' in satellite:
        
        SCALE = 10 # ok to upscale
    
        ## H/V
        if 'hh' in satellite:
            HV_VAR = 'HH'
            
        if 'hv' in satellite:
            HV_VAR = 'HV'
            
        if 'vv' in satellite:
            HV_VAR = 'VV'
            
        if 'vh' in satellite:
            HV_VAR = 'VH'
            
        if 'vdiv' in satellite:
            HV_VAR = 'VV_DIV_VH'
            
        ## A/D
        if 'desc' in satellite:
            AD_VAR = 'DESCENDING'
            
        if 'asc' in satellite:
            AD_VAR = 'ASCENDING'
            
        ## Year
        year_use = 2018
            
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        ## Image
        if HV_VAR == 'VV_DIV_VH':
            
            image_vv = ee.ImageCollection('COPERNICUS/S1_GRD')\
                .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))\
                .filter(ee.Filter.eq('instrumentMode', 'IW'))\
                .filter(ee.Filter.eq('orbitProperties_pass', AD_VAR))\
                .select('VV')\
                .filterDate(year_minus_str, year_plus_str)\
                .mean()

            image_vh = ee.ImageCollection('COPERNICUS/S1_GRD')\
                .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))\
                .filter(ee.Filter.eq('instrumentMode', 'IW'))\
                .filter(ee.Filter.eq('orbitProperties_pass', AD_VAR))\
                .select('VH')\
                .filterDate(year_minus_str, year_plus_str)\
                .mean()

            image = image_vv.divide(image_vh)
        
        else: 
            image = ee.ImageCollection('COPERNICUS/S1_GRD')\
                .filter(ee.Filter.listContains('transmitterReceiverPolarisation', HV_VAR))\
                .filter(ee.Filter.eq('instrumentMode', 'IW'))\
                .filter(ee.Filter.eq('orbitProperties_pass', AD_VAR))\
                .select(HV_VAR)\
                .filterDate(year_minus_str, year_plus_str)\
                .mean()

        ## Mean / Std Dev
        #if 'mean' in satellite:
        #    image = image.mean()
            
        #if 'stddev' in satellite:
        #    image = image.reduce(ee.Reducer.stdDev())
            
        BANDS = ['mean']
                

    # Prep drought ---------------------------------------------------
    if satellite == 'gridmet_drought':
        
        SCALE = 5000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("GRIDMET/DROUGHT")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y']
        
    # Prep AOD ------------------------------------------------------
    if satellite == 'aod':
        
        SCALE = 1000 
        
        year_use = max([2001, year])

        year_minus_str = str(year_use) + '-01-01'
        year_plus_str = str(year_use) + '-12-31'
        
        image = ee.ImageCollection("MODIS/006/MCD19A2_GRANULES")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['Optical_Depth_047', 'Optical_Depth_055']
    
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather':
        
        SCALE = 1000 

        # Data available until July 2020
        year_use = min([2019, year])
        
        year_minus_str = str(year_use) + '-01-01'
        year_plus_str = str(year_use) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q1':
        
        SCALE = 1000 
        
        # Data available until July 2020
        year_use = min([2019, year])

        year_minus_str = str(year_use) + '-01-01'
        year_plus_str = str(year_use) + '-03-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q2':
        
        SCALE = 1000 
        
        # Data available until July 2020
        year_use = min([2019, year])

        year_minus_str = str(year_use) + '-04-01'
        year_plus_str = str(year_use) + '-06-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q3':
        
        SCALE = 1000 
        
        # Data available until July 2020
        year_use = min([2019, year])

        year_minus_str = str(year_use) + '-07-01'
        year_plus_str = str(year_use) + '-09-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q4':
        
        SCALE = 1000 
        
        # Data available until July 2020
        year_use = min([2019, year])

        year_minus_str = str(year_use) + '-10-01'
        year_plus_str = str(year_use) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
    
    # Prep viirs ---------------------------------------------------
    if (satellite == 'viirs') | (satellite == 'viirs_sdspace'):
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep viirs ---------------------------------------------------
    # https://gis.stackexchange.com/questions/344626/gee-pixel-based-sd-over-time-series-sentinel-2-ndvi
    if satellite == 'viirs_sdtime':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .reduce(ee.Reducer.stdDev())
        
        BANDS = ['avg_rad_stdDev']
        
    # Prep viirs181920 ---------------------------------------------------
    if satellite == 'viirs181920':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        year_use = 2019
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep DMSP ---------------------------------------------------
    if satellite == 'dmsp':
        
        SCALE = 1000 
        
        # Year
        # DMSP-OLS starts in 2013; if year is more than
        # 2012, use 2012 as year (to ensure have year before and after)
        if year > 2012:
            year_use = 2012
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['stable_lights', 'avg_lights_x_pct']
    
    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural)
        
    # Extract Values ---------------------------------------------------
    if (satellite == 'worldpop') | (satellite == 'worldpop2020'):
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.sum(),
                                   scale = SCALE,
                                   tileScale = 8)
    elif (satellite == 'viirs_sdspace') | (satellite == 'l7_sdspace') | (satellite == 'l8_sdspace') | ( ('s1_sar' in satellite) & ('stddev' in satellite) ):
        vals = image.reduceRegions(collection = survey_fc,
                           reducer = ee.Reducer.stdDev(),
                           scale = SCALE,
                           tileScale = 8)   
    elif (satellite == 'NO2') | (satellite == 'uv_aer') | (satellite == 'CO') | (satellite == 'HCHO') | (satellite == 'ozone') | (satellite == 'SO2') | (satellite == 'CH4'):
        
        #vals = survey_fc.map(lambda feature: ee.Feature(None, image.reduceRegion(
        #    reducer=ee.Reducer.mean(),
        #    geometry=feature.geometry(),
        #    scale=SCALE,
        #    bestEffort = True
        #)))
        
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.mean(),
                                   scale = SCALE) 
        
    else:
        
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.mean(),
                                   scale = SCALE,
                                   tileScale = 8) 
        

        
    # OLD =============
    # Survey dataset that only contains the uid variable
    #survey_df = survey_df[['uid']]
            
    #for band_i in BANDS:
    #    survey_df[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()
        
    # NEW =============
    #df_out = fc2df(vals)
    #print(df_out)
    #df_out = pd.DataFrame()
    
    bands_to_export = BANDS.copy()
    bands_to_export.append('uid')
    bands_to_export.append('year')
    #print(bands_to_export)
    
    task = ee.batch.Export.table.toDrive(collection=vals, 
                                         folder='satellite_data_from_gee_' + survey_name.lower(), 
                                         description=file_name, 
                                         fileFormat='CSV',
                                         selectors = bands_to_export)
    
    #task = ee.batch.Export.table.toDrive(collection=vals, 
    #                                     folder='gee_outputs_dhs', 
    #                                     description=file_name, 
    #                                     fileFormat='CSV',
    #                                     selectors = bands_to_export)
     #selectors=props
    task.start()
    #ee.batch.data.startProcessing(mytask.id, mytask.config)
    
    if False:
        time_elapsed = 0
        while task.active():
            if((time_elapsed % 60) == 0):
                print('Polling for task (id: {}).'.format(task.id))
            time.sleep(5)
            time_elapsed = time_elapsed + 5
        
    return task

def extract_satellite_in_chunks(survey_df, buffer_size_urban, buffer_size_rural, satellite, file_name, year, survey_name):
    
    vals_df_list = []
    
    for chunk_i in list(np.unique(survey_df.chunk_id)):

        survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]
        vals_i_df = extract_sat(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, file_name, year, chunk_i, survey_name)

        vals_df_list.append(vals_i_df)
    
    return vals_df_list

def extract_satellite_by_year(survey_df, buffer_size_urban, buffer_size_rural, satellite, file_name, survey_name):
    
    vals_df_list = []
    
    for year_i in list(np.unique(survey_df.year)):

        survey_df_i = survey_df[survey_df['year'] == year_i]
        vals_i_df = extract_satellite_in_chunks(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, file_name, year_i, survey_name)

        vals_df_list.append(vals_i_df)
    
    return vals_df_list

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

## Load/Prep Survey Data

In [23]:
survey_df = pd.read_csv(os.path.join(cf.DROPBOX_DIRECTORY, 'Data', SURVEY_NAME, 'FinalData', 'Individual Datasets', 'survey_socioeconomic.csv'))
survey_df = survey_df[['uid', 'year', 'latitude', 'longitude', 'most_recent_survey']] # urban_rural
survey_df = survey_df.sort_values('year')
#survey_df = survey_df.head(1000)
#survey_df = survey_df[survey_df.uid != 'IA201400180012']

survey_years = list(survey_df.year.unique())

#CHUNK_SIZE = 1000
#survey_df['chunk_id'] = 0chunk_ids(survey_df.shape[0], CHUNK_SIZE)

In [24]:
print(survey_df.head())
print(survey_df.shape)

                  uid  year   latitude  longitude  most_recent_survey
3540   BF199900000002  1998  12.515376  -1.690597               False
76814  TG199800000137  1998   6.235644   1.472263               False
76815  TG199800000138  1998   6.770265   1.512131               False
76816  TG199800000139  1998   6.708370   1.491649               False
19776  GH199800000130  1998   5.653637   0.024108               False
(82227, 5)


## If re-extract, delete existing files

In [25]:
if REEXTRACT_IF_FILE_EXISTS:
    print("Deleting existing files from Google Drive")

    ## Path with files
    OUT_PATH = os.path.join(cf.GOOGLEDRIVE_DIRECTORY, 
                            'Data', 
                             SURVEY_NAME, 
                             'FinalData', 
                             'Individual Datasets',
                             'satellite_data_from_gee_' + SURVEY_NAME.lower())

    ## Grab csv files
    files_to_rm = [x for x in os.listdir(OUT_PATH) if '.csv' in x]

    ## Delete files
    for file_i in files_to_rm:

        path_i = os.path.join(OUT_PATH, file_i)
        os.remove(path_i)

## List of files already extracted

In [26]:
## Path with files
OUT_PATH = os.path.join(cf.GOOGLEDRIVE_DIRECTORY, 
                        'Data', 
                         SURVEY_NAME, 
                         'FinalData', 
                         'Individual Datasets',
                         'satellite_data_from_gee_' + SURVEY_NAME.lower())

## Grab csv files
files_extracted = [x for x in os.listdir(OUT_PATH) if '.csv' in x]

len(files_extracted)

1544

In [27]:
#if (file_name_i_csv not in files_extracted):

In [28]:
file_name_i_csv

'gee_s1_sar_vdiv_asc_stddev_ubuff2500_rbuff2500_2020_16.csv'

In [29]:
# file_name_i_csv not in files_extracted

## Extract Values

In [30]:
# "file": check if file exists
# "data": check processed data

how_check_processed = 'file' 

In [31]:
to_extract = ['elevation', 
              'slope',
              #'viirs_750',
              'viirs_1120',
              #'viirs_1250',
              #'viirs_1500',
              #'viirs_2000',
              'viirs_2500',
              'viirs_3360',
              #'viirs_5000',
              #'viirs181920_750',
              'viirs181920_1120',
              #'viirs181920_1250',
              #'viirs181920_1500',
              #'viirs181920_2000',
              #'viirs181920_2500',
              'viirs181920_3360',
              #'viirs181920_5000',
              'viirs_sdtime_2500',
              'viirs_sdspace_2500',
              'GlobalHumanModification',
              #'worldpop_750',
              #'worldpop_1500',
              'worldpop_2000',
              #'worldpop_2500',
              'worldpop_5000',
              'worldpop_10000',
              #'worldpop2020_750',
              #'worldpop2020_1500',
              'worldpop2020_2000',
              #'worldpop2020_2500',
              'worldpop2020_5000',
              'worldpop2020_10000',
              'l8',
              'l7',
              'l8_sdtime',
              'l7_sdtime',
              'l8_sdspace',
              'l7_sdspace',
              'aod',
              'ecmwf_weather',
              'ecmwf_weather_q1',
              'ecmwf_weather_q2',
              'ecmwf_weather_q3',
              'ecmwf_weather_q4',
             's1_sar_h_med', 
              's1_sar_h_stddev']

to_extract = ['s1_sar_hh_desc_mean',
             's1_sar_hh_desc_stddev',
             's1_sar_hh_asc_mean',
             's1_sar_hh_asc_stddev',
             's1_sar_vv_desc_mean',
             's1_sar_vv_desc_stddev',
             's1_sar_vv_asc_mean',
             's1_sar_vv_asc_stddev',
             's1_sar_hv_desc_mean',
             's1_sar_hv_desc_stddev',
             's1_sar_hv_asc_mean',
             's1_sar_hv_asc_stddev',
             's1_sar_vh_desc_mean',
             's1_sar_vh_desc_stddev',
             's1_sar_vh_asc_mean',
             's1_sar_vh_asc_stddev']

to_extract = ['s1_sar_vv_desc_mean',
             's1_sar_vv_desc_stddev',
             's1_sar_vv_asc_mean',
             's1_sar_vv_asc_stddev',
             's1_sar_vh_desc_mean',
             's1_sar_vh_desc_stddev',
             's1_sar_vh_asc_mean',
             's1_sar_vh_asc_stddev',
             's1_sar_vdiv_desc_mean',
             's1_sar_vdiv_desc_stddev',
             's1_sar_vdiv_asc_mean',
             's1_sar_vdiv_asc_stddev']

tasks_all = []

# Loop over satellites ------------------------------
for name in to_extract:
    print(name)
        
    sat = name
    
    if name in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4', 'l8', 'l7', 'l5', 'l7_sdtime', 'l8_sdtime', 'l7_sdspace', 'l8_sdspace', 'aod', 'GlobalHumanModification', 'elevation', 'slope']:
        if SURVEY_NAME == "DHS":
            buffer_u = 2500
            buffer_r = 2500
            
        if SURVEY_NAME == "PAK_POINTS":
            buffer_u = 1500
            buffer_r = 1500
            
        if SURVEY_NAME == "PAK_CITY_POINTS":
            buffer_u = 750
            buffer_r = 750
            
    if 's1_sar' in name:
        buffer_u = 2500
        buffer_r = 2500
                    
    if name in ['ecmwf_weather',
                'ecmwf_weather_q1', 'ecmwf_weather_q2', 'ecmwf_weather_q3', 'ecmwf_weather_q4']:
        # 27km radius
        buffer_u = 10000
        buffer_r = 10000
        
    if name == 'viirs181920_750':
        sat = 'viirs'
        buffer_u = 750
        buffer_r = 750
        
    if name == 'viirs181920_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
                
    if name == 'viirs181920_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
        
    if name == 'viirs181920_1500':
        sat = 'viirs'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'viirs181920_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs181920_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs181920_3360':
        sat = 'viirs'
        buffer_u = 3360
        buffer_r = 3360
        
    if name == 'viirs181920_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000

    if name == 'viirs_750':
        sat = 'viirs'
        buffer_u = 750
        buffer_r = 750
            
    if name == 'viirs_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
            
    if name == 'viirs_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
        
    if name == 'viirs_1500':
        sat = 'viirs'
        buffer_u = 1500
        buffer_r = 1500
            
    if name == 'viirs_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs_3360':
        sat = 'viirs'
        buffer_u = 3360
        buffer_r = 3360
        
    if name == 'viirs_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'viirs_sdtime_2500':
        sat = 'viirs_sdtime'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs_sdspace_2500':
        sat = 'viirs_sdspace'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'worldpop_750':
        sat = 'worldpop'
        buffer_u = 750
        buffer_r = 750
        
    if name == 'worldpop_1500':
        sat = 'worldpop'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'worldpop_2000':
        sat = 'worldpop'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop_2500':
        sat = 'worldpop'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'worldpop_5000':
        sat = 'worldpop'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop_10000':
        sat = 'worldpop'
        buffer_u = 10000
        buffer_r = 10000
       
    if name == 'worldpop2020_750':
        sat = 'worldpop2020'
        buffer_u = 750
        buffer_r = 750
    
    if name == 'worldpop2020_1500':
        sat = 'worldpop2020'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'worldpop2020_2000':
        sat = 'worldpop2020'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop2020_2500':
        sat = 'worldpop2020'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'worldpop2020_5000':
        sat = 'worldpop2020'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop2020_10000':
        sat = 'worldpop2020'
        buffer_u = 10000
        buffer_r = 10000
        
    survey_df_use = survey_df.copy()
    
    # Define Chunk Size ---------------------------------
    CHUNK_SIZE = 5000
    
    if sat in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4']:
        CHUNK_SIZE = 1
        
    if sat in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4']:
        survey_df_use = survey_df_use[survey_df_use.most_recent_survey == True]
        
    ## Check to see if file exists    
    if (how_check_processed == 'file'):
        
        survey_df_use['chunk_id'] = chunk_ids(survey_df_use.shape[0], CHUNK_SIZE)

        # Loop over satellites ------------------------------

        # LOOP OVER YEARS AND CHUNKS TO EXTRACT DATA 
        for year_i in survey_df_use['year'].unique():

            survey_df_year = survey_df_use[survey_df_use['year'] == year_i]

            # LOOP OVER CHUNKS
            for chunk_id_i in survey_df_year['chunk_id'].unique():
                survey_df_year_i = survey_df_year[survey_df_year['chunk_id'] == chunk_id_i]

                # TODO: CHANGE BACK - changes to "gee_small_" for pollution stuff due to issues.
                file_name_i = 'gee_' + name + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r) + '_' + str(year_i) + '_' + str(chunk_id_i)    
                file_name_i_csv = file_name_i + '.csv'

                # ONLY EXTRACT DATA IF NOT ALREADY EXTRACTED

                ## Check against file name
                if (file_name_i_csv not in files_extracted):

                    task_i = extract_sat(survey_df = survey_df_year_i, 
                                         buffer_size_urban = buffer_u, 
                                         buffer_size_rural = buffer_r, 
                                         year = year_i,
                                         satellite = sat, 
                                         survey_name = SURVEY_NAME,
                                         file_name = file_name_i)

                    tasks_all.append(task_i)
                    
    if (how_check_processed == 'data'):
        
        survey_df_use_copy = survey_df_use.copy()

        ## Root name
        fname_root = 'gee_' + sat + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r)

        ## Make dataframe
        all_filenames = [i for i in glob.glob('*.{}'.format('.csv'))]
        sat_files     = glob.glob(OUT_PATH + '/' + fname_root + '*')
        processed_df  = pd.concat([pd.read_csv(f) for f in sat_files])

        ## Merge and subset to not processed
        processed_df = processed_df[['uid', 'year']]
        processed_df['already_scraped'] = 1

        survey_df_use_copy = survey_df_use_copy.merge(processed_df, on=['uid', 'year'], how='left')

        survey_df_ntprcsd = survey_df_use_copy[survey_df_use_copy.already_scraped.isnull()]
        
        print(survey_df_ntprcsd.shape[0])

        if (survey_df_ntprcsd.shape[0] > 0):
        
            ## Add chunks
            survey_df_ntprcsd['chunk_id'] = chunk_ids(survey_df_ntprcsd.shape[0], CHUNK_SIZE)

            # Loop over satellites ------------------------------

            # LOOP OVER YEARS AND CHUNKS TO EXTRACT DATA 
            for year_i in survey_df_ntprcsd['year'].unique():

                survey_df_year = survey_df_ntprcsd[survey_df_ntprcsd['year'] == year_i]

                # LOOP OVER CHUNKS
                for chunk_id_i in survey_df_year['chunk_id'].unique():
                    survey_df_year_i = survey_df_year[survey_df_year['chunk_id'] == chunk_id_i]

                    # TODO: CHANGE BACK - changes to "gee_small_" for pollution stuff due to issues.
                    now = datetime.now()
                    dt_string = now.strftime("%d%m%Y%H%M%S")

                    file_name_i = 'gee_' + name + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r) + '_' + str(year_i) + '_' + str(chunk_id_i) + '_' + str(dt_string)    
                    file_name_i_csv = file_name_i + '.csv'

                    # Extract data
                    
                    task_i = extract_sat(survey_df = survey_df_year_i, 
                                         buffer_size_urban = buffer_u, 
                                         buffer_size_rural = buffer_r, 
                                         year = year_i,
                                         satellite = sat, 
                                         survey_name = SURVEY_NAME,
                                         file_name = file_name_i)

                    tasks_all.append(task_i)
                    
                    time.sleep(1.1)

s1_sar_vv_desc_mean
s1_sar_vv_desc_stddev
s1_sar_vv_asc_mean
s1_sar_vv_asc_stddev
s1_sar_vh_desc_mean
s1_sar_vh_desc_stddev
s1_sar_vh_asc_mean
s1_sar_vh_asc_stddev
s1_sar_vdiv_desc_mean
gee_s1_sar_vdiv_desc_mean_ubuff2500_rbuff2500_2015_9.csv
gee_s1_sar_vdiv_desc_mean_ubuff2500_rbuff2500_2015_10.csv
gee_s1_sar_vdiv_desc_mean_ubuff2500_rbuff2500_2015_11.csv
gee_s1_sar_vdiv_desc_mean_ubuff2500_rbuff2500_2015_12.csv
gee_s1_sar_vdiv_desc_mean_ubuff2500_rbuff2500_2017_14.csv
s1_sar_vdiv_desc_stddev
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2002_1.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2003_2.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2005_2.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2008_3.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2010_5.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2011_5.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2014_6.csv
gee_s1_sar_vdiv_desc_stddev_ubuff2500_rbuff2500_2015_7.csv
gee_s1_sar_vdiv_desc_stddev_u

In [12]:
to_extract = ['CH4', 'SO2', 'ozone', 'HCHO', 'CO', 'uv_aer', 'NO2']

In [13]:
'CH4' in to_extract

True

In [14]:
survey_df

Unnamed: 0,uid,year,latitude,longitude,most_recent_survey
3540,BF199900000002,1998,12.515376,-1.690597,False
76814,TG199800000137,1998,6.235644,1.472263,False
76815,TG199800000138,1998,6.770265,1.512131,False
76816,TG199800000139,1998,6.708370,1.491649,False
19776,GH199800000130,1998,5.653637,0.024108,False
...,...,...,...,...,...
55944,KE202000002154,2020,-3.998680,39.624774,True
55943,KE202000002144,2020,-4.034569,39.686857,True
55942,KE202000002140,2020,-4.012838,39.695890,True
55940,KE202000002121,2020,-1.306749,36.893506,True


In [None]:
survey_df

In [None]:
#survey_fc1 = survey_df[survey_df.uid.isin(['TG199800000508', 'TG199800000509', 'TG199800000510', 'TG199800000511'])]
#survey_fc1

In [None]:
SCALE = 10

In [None]:
#survey_fc = survey_to_fc_buffer(survey_fc1, 2500, 2500)
survey_fc = survey_to_fc_buffer(survey_df.head(), 2500, 2500)

In [None]:
SCALE = 100 # ok to upscale
#SCALE = 2000

# Year
# Starts in late 2014
#if year < 2017:
#    year_use = 2017
#else:
#    year_use = year

year_use = 2018

year_plus = year_use + 0
year_minus = year_use - 0

year_minus_str = str(year_minus) + '-01-01'
year_plus_str = str(year_plus) + '-12-31'

image = ee.ImageCollection('COPERNICUS/S1_GRD')\
    .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'HV'))\
    .filter(ee.Filter.eq('resolution_meters', 10))\
    .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\
    .select('HV')\
    .filterDate(year_minus_str, year_plus_str)\
    .reduce(ee.Reducer.stdDev())


#.reduce(ee.Reducer.stdDev())
#BANDS = ['VV', 'VH']   

In [None]:
vals = image.reduceRegions(collection = survey_fc,
                           reducer = ee.Reducer.mean(),
                           scale = SCALE) 

In [None]:
vals.getInfo()

In [None]:
bands_to_export = BANDS.copy()
bands_to_export.append('uid')
bands_to_export.append('year')
#print(bands_to_export)

bands_to_export

In [None]:
task = ee.batch.Export.table.toDrive(collection=vals, 
                                     folder='satellite_data_from_gee_' + 'dhs'.lower(), 
                                     description='TEST', 
                                     fileFormat='CSV',
                                     selectors = bands_to_export)

In [None]:
task.start()

In [47]:
import ee

# Initialize the Earth Engine API
ee.Initialize()

# Define the study area using a geometry
study_area = ee.Geometry.Polygon([
    [[-122.49443054199219, 37.73132798754766],
     [-122.49443054199219, 37.62568842838177],
     [-122.3291015625, 37.62568842838177],
     [-122.3291015625, 37.73132798754766]]
])

# Create a list of points within the study area
points = ee.List.sequence(0, 10).map(lambda i: ee.Feature(study_area.centroid(10), {'name': i}))

# Convert the list of points to a FeatureCollection
points_fc = ee.FeatureCollection(points)

# Load Sentinel-1 VV and VH images
s1_vv = (ee.ImageCollection('COPERNICUS/S1_GRD')
         .filter(ee.Filter.eq('instrumentMode', 'IW'))
         .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
         .filter(ee.Filter.eq('orbitProperties_pass', 'DESCENDING'))
         .select('VV'))

s1_vh = (ee.ImageCollection('COPERNICUS/S1_GRD')
         .filter(ee.Filter.eq('instrumentMode', 'IW'))
         .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))
         .filter(ee.Filter.eq('orbitProperties_pass', 'DESCENDING'))
         .select('VH'))

# Extract values at the points for each image in the collections
s1_vv_values = s1_vv.map(lambda image: image.reduceRegions(
    collection=points_fc,
    reducer=ee.Reducer.mean(),
    scale=10
))

s1_vh_values = s1_vh.map(lambda image: image.reduceRegions(
    collection=points_fc,
    reducer=ee.Reducer.mean(),
    scale=10
))

# Flatten the collections of point features to a single list
s1_vv_list = s1_vv_values.flatten()
s1_vh_list = s1_vh_values.flatten()

# Print the list of point features
print(s1_vv_list.getInfo())
print(s1_vh_list.getInfo())

EEException: Collection query aborted after accumulating over 5000 elements.

In [93]:
if 'hh' in 's1_sar_hh_asc_med':
    VAR = 'HH'
    
if 'HV' in 's1_sar_hh_asc_med':
    VAR = 'HV'

True