## Setup

In [1]:
import ee
ee.Authenticate()
ee.Initialize()

Enter verification code:  4/1AX4XfWhQYf1dLGd3h9ALHD2LaY3ahYs7aKIHbzwVCYhjkHOxXUbvOoUQjsc



Successfully saved authorization token.


In [2]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import config as cf
import pandas as pd
import eeconvert
import time
import geopandas as gpd

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

  data = yaml.load(f.read()) or {}
  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)
  defaults = yaml.load(f)


In [3]:
SURVEY_NAME = 'DHS'

## Functions

In [116]:
# https://gis.stackexchange.com/questions/257727/iterate-over-imagecollection-returning-pandas-dataframe-using-earth-engine-pyt
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df = df.drop(columns=['geometry'])
    return df

def survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]
        
        ur = survey_df_i['urban_rural'].iloc[0]
        if ur == 'U':
            buffer_size = buffer_size_urban
        elif ur == 'R':
            buffer_size = buffer_size_rural

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0]})
        
        f_i = f_i.buffer(buffer_size)

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def extract_sat(survey_df, buffer_size_urban, buffer_size_rural, satellite, year, chunk):
    '''
    Extract satellite imagery to locations 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    #print(survey_df.uid)
    
    year_start_sp5 = "2018-01-01"
    year_end_sp5 = '2020-12-31'
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'worldpop':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = year
        
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'l7':
        
        # Scale
        SCALE = 100 # ok to upscale
        
        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        
    # Sentinel-5P OFFL AER AI: Offline UV Aerosol Index  -------------------
    if satellite == 'uv_aer':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_AER_AI")\
            .median()
        
        BANDS = ['absorbing_aerosol_index']
        
    # Sentinel-5P OFFL CO: Offline Carbon Monoxide  -------------------
    if satellite == 'CO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")\
            .median()
        
        BANDS = ['CO_column_number_density', 'H2O_column_number_density']
        
    # Sentinel-5P OFFL HCHO: Offline Formaldehyde  -------------------
    if satellite == 'HCHO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO")\
            .median()
        
        BANDS = ['tropospheric_HCHO_column_number_density', 'tropospheric_HCHO_column_number_density_amf']
        
    # Sentinel-5P Nitrogen Dioxide  -----------------------------
    if satellite == 'NO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['NO2_column_number_density', 'tropospheric_NO2_column_number_density',\
                 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density']
        
    # Sentinel-5P OFFL O3: Offline Ozone  -------------------
    if satellite == 'ozone':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_O3")\
            .median()
        
        BANDS = ['O3_column_number_density', 'O3_effective_temperature']
        
    # Sentinel-5P OFFL SO2: Offline Sulphur Dioxide  -------------------
    if satellite == 'SO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_SO2")\
            .median()
        
        BANDS = ['SO2_column_number_density', 'SO2_column_number_density_amf', 'SO2_slant_column_number_density']
        
    # Sentinel-5P OFFL CH4: Offline Methane  -------------------
    if satellite == 'CH4':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CH4")\
            .median()
        
        BANDS = ['CH4_column_volume_mixing_ratio_dry_air']
        
    # CSP gHM: Global Human Modification ---------------------------------
    if satellite == 'GlobalHumanModification':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.ImageCollection("CSP/HM/GlobalHumanModification")\
            .median()
        
        # Original name is "gHM", but because only one value, it takes the
        # name of the reducer; we use mean
        BANDS = ['mean']
        
    # WorldClim BIO Variables V1 ---------------------------------
    if satellite == 'worldclim_bio':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('WORLDCLIM/V1/BIO')
        
        BANDS = ['bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10',\
                 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'elevation':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        
        # elevation?
        BANDS = ['mean']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'slope':
        # https://developers.google.com/earth-engine/datasets/catalog/CGIAR_SRTM90_V4#description
        
        # Scale
        SCALE = 500 # ok to upscale
                
        image_raw = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        image_elev = image_raw.select('elevation')
        image = ee.Terrain.slope(image_elev)
                
        # mean?
        BANDS = ['mean']
        
    # Prep l8 ---------------------------------------------------
    if satellite == 'l8':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI');
        ndbi = image.normalizedDifference(['B6', 'B5']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 'NDBI', 'BU']
        #BANDS = ['NDVI']
        
    # Prep s2 ---------------------------------------------------
    if satellite == 's2':
        
        SCALE = 100 # ok to upscale
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2018
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-12-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .median()\
            .multiply(0.0001)
        
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

        image = image.select(BANDS) 
        
    # Prep drought ---------------------------------------------------
    if satellite == 'gridmet_drought':
        
        SCALE = 5000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("GRIDMET/DROUGHT")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y']
    
    # Prep viirs ---------------------------------------------------
    if satellite == 'viirs':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep DMSP ---------------------------------------------------
    if satellite == 'dmsp':
        
        SCALE = 1000 
        
        # Year
        # DMSP-OLS starts in 2013; if year is more than
        # 2012, use 2012 as year (to ensure have year before and after)
        if year > 2012:
            year_use = 2012
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['stable_lights', 'avg_lights_x_pct']
    
    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural)
    
    # Extract Values ---------------------------------------------------
    if satellite == 'worldpop':
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.sum(),
                                   scale = SCALE,
                                   tileScale = 8)
    else:
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.mean(),
                                   scale = SCALE,
                                   tileScale = 8)

    # OLD =============
    # Survey dataset that only contains the uid variable
    #survey_df = survey_df[['uid']]
            
    #for band_i in BANDS:
    #    survey_df[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()
        
    # NEW =============
    #df_out = fc2df(vals)
    #print(df_out)
    #df_out = pd.DataFrame()
    
    bands_to_export = BANDS.copy()
    bands_to_export.append('uid')
    #print(bands_to_export)
    
    task = ee.batch.Export.table.toDrive(collection=vals, 
                                         folder='satellite_data_from_gee_dhs', 
                                         description=satellite + "_ubuff" + str(buffer_size_urban) + '_rbuff' + str(buffer_size_rural) + "_" + str(year) + '_' + str(chunk), 
                                         fileFormat='CSV',
                                         selectors = bands_to_export)
    # selectors=props
    task.start()
    #ee.batch.data.startProcessing(mytask.id, mytask.config)
    
    if False:
        time_elapsed = 0
        while task.active():
            if((time_elapsed % 60) == 0):
                print('Polling for task (id: {}).'.format(task.id))
            time.sleep(5)
            time_elapsed = time_elapsed + 5
        
    return task

def extract_satellite_in_chunks(survey_df, buffer_size_urban, buffer_size_rural, satellite, year):
    
    vals_df_list = []
    
    for chunk_i in list(np.unique(survey_df.chunk_id)):
        #print(chunk_i)
        #time.sleep(5)

        survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]
        #print(survey_df_i.shape)
        vals_i_df = extract_sat(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year, chunk_i)

        vals_df_list.append(vals_i_df)

    #vals_df = pd.concat(vals_df_list)
    
    return vals_df_list

def extract_satellite_by_year(survey_df, buffer_size_urban, buffer_size_rural, satellite):
    
    vals_df_list = []
    
    for year_i in list(np.unique(survey_df.year)):
        #print(year_i)
        #time.sleep(5)

        survey_df_i = survey_df[survey_df['year'] == year_i]
        vals_i_df = extract_satellite_in_chunks(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year_i)

        vals_df_list.append(vals_i_df)

    #vals_df = pd.concat(vals_df_list)
    
    return vals_df_list

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

## Load/Prep Survey Data

In [113]:
survey_df = pd.read_csv(os.path.join(cf.SECURE_DATA_DIRECTORY, 'Data', SURVEY_NAME, 'FinalData - PII', 'GPS_uid_crosswalk.csv'))
survey_df = survey_df.sort_values('year')
#survey_df = survey_df[survey_df.uid != 'IA201400180012']

CHUNK_SIZE = 10000
survey_years = list(survey_df.year.unique())
survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)

## TESTING

In [168]:
survey_df_i = survey_df[survey_df.year == 2010]
survey_df_i = survey_df_i[survey_df_i.chunk_id == 1]
#survey_df_i = survey_df_i.iloc[2001:2010]

In [169]:
survey_fc_i = survey_to_fc_buffer(survey_df_i, 10000, 10000)

In [170]:
# Scale
SCALE = 1000 # ok to upscale

satellite = 'worldclim'
image = ee.Image('WORLDCLIM/V1/BIO')

BANDS = ['bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10',\
         'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19']

In [171]:
vals = image.reduceRegions(collection = survey_fc_i,
                           reducer = ee.Reducer.mean(),
                           scale = SCALE,
                           tileScale = 8)

# Survey dataset that only contains the uid variable
survey_df_i = survey_df_i[['uid']]

a_uid = vals.aggregate_array('uid').getInfo()
a_bio01 = vals.aggregate_array('bio01').getInfo()
#for band_i in BANDS:
#    survey_df_i[satellite + '_' + band_i] = 


In [174]:
print(len(a_uid))
print(len(a_bio01))

3781
3768


In [189]:
# https://gis.stackexchange.com/questions/257727/iterate-over-imagecollection-returning-pandas-dataframe-using-earth-engine-pyt
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df = df.drop(columns=['geometry'])
    return df

In [190]:
vals_df = fc2df(vals)
vals_df.head()

Unnamed: 0,bio01,bio02,bio03,bio04,bio05,bio06,bio07,bio08,bio09,bio10,bio11,bio12,bio13,bio14,bio15,bio16,bio17,bio18,bio19,uid
0,259.44136,95.948508,84.245615,383.436229,313.885636,201.002703,112.882933,260.791517,253.903253,262.767437,253.58526,2820.033976,319.113157,130.495857,25.729098,907.313554,453.955298,721.140015,547.723125,CO201000004201
1,259.379068,96.630702,84.635908,375.786338,314.028793,200.853644,113.17515,260.603392,253.973545,262.620191,253.625346,2797.763529,317.720213,130.091674,26.197656,903.347646,446.844742,712.946625,541.175439,CO201000004203
2,259.387229,96.051129,84.314433,382.172891,313.860583,200.940873,112.91971,260.721807,253.862092,262.685453,253.545269,2817.273641,318.392661,130.218978,25.86506,906.723706,452.349605,721.071915,546.611677,CO201000004204
3,259.407052,96.274818,84.422709,379.940554,313.95059,200.911668,113.038922,260.686096,253.921213,262.688083,253.592746,2809.602807,318.227797,130.228514,25.980193,905.383259,450.293707,718.551271,544.573983,CO201000004205
4,259.410129,96.205329,84.384316,380.677006,313.933108,200.925036,113.008072,260.700783,253.911041,262.699576,253.585206,2811.925865,318.326715,130.25028,25.940061,905.795099,450.966076,719.314179,545.224981,CO201000004202


In [187]:
vals_df_na = vals_df[vals_df['bio19'].isna()]

In [105]:
elev_task

[[<Task 2C55P3X7PVVBH2XAREARRH35 EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2006_5 (UNSUBMITTED)>],
 [<Task TIYWYXGH4IW4BG5Q3V5P4BYZ EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2008_0 (UNSUBMITTED)>,
  <Task Z4TWRXDG2IZSPQWYZJESYLYU EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2008_5 (UNSUBMITTED)>],
 [<Task 3UDO7NS6EPUITD57NEE2O2C6 EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2009_1 (UNSUBMITTED)>],
 [<Task CGJGO5TKGYOJDIWRC5Z4WLW2 EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2010_0 (UNSUBMITTED)>,
  <Task Q24Q447W7EEYAY3ERTHNOCBM EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2010_1 (UNSUBMITTED)>],
 [<Task WY652APR6LXRGBDI3YVL5LCX EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2011_0 (UNSUBMITTED)>,
  <Task XCWWL6OYRTQ53MK7F5TPFZQD EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2011_1 (UNSUBMITTED)>,
  <Task Y7CV53BRG55CSJOHZ2QSZ3RE EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2011_5 (UNSUBMITTED)>],
 [<Task 5QIF6HPEWOLRI7BXFL6AAEWM EXPORT_FEATURES: elevation_ubuff500

## Extract Values

In [124]:
tasks_all = []
for sat in ['elevation', 
            'slope',
            'viirs_2000',
            'viirs_2500',
            'viirs_5000',
            'gridmet_drought'
            'GlobalHumanModification',
            'worldpop',
            'l8',
            'NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4']:
    
    if sat in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4', 'l8']:
        buffer_u = 2500
        buffer_r = 2500
    
    if sat in ['elevation', 'slope', 'gridmet_drought']:
        buffer_u = 5000
        buffer_r = 5000
        
    if sat in ['worldclim_bio', 'GlobalHumanModification']:
        buffer_u = 10000
        buffer_r = 10000
        
    if sat == 'viirs_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if sat == 'viirs_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if sat == 'viirs_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
        
    if sat == 'worldclim_bio':
        buffer_u = 5000
        buffer_r = 5000
    
    # Can check in dropbox, cleaned data if already processed
    
    tasks_i = extract_satellite_by_year(survey_df, buffer_u, buffer_r, sat)
    tasks_all.append(tasks_i)

2006
(270, 8)
2008
(1583, 8)


KeyboardInterrupt: 

In [None]:
tasks_all

In [88]:
elev_task = extract_satellite_by_year(survey_df, 5000, 5000, 'elevation')

2006
5
(270, 8)
['mean', 'uid']
2008
0
(998, 8)
['mean', 'uid']
5
(585, 8)
['mean', 'uid']
2009
1
(312, 8)
['mean', 'uid']
2010
0
(4322, 8)
['mean', 'uid']
1
(1087, 8)
['mean', 'uid']
2011
0
(341, 8)
['mean', 'uid']
1
(1128, 8)
['mean', 'uid']
5
(609, 8)
['mean', 'uid']
2012
1
(332, 8)
['mean', 'uid']
4
(215, 8)
['mean', 'uid']
5
(341, 8)
['mean', 'uid']
2013
0
(492, 8)
['mean', 'uid']
1
(524, 8)
['mean', 'uid']
5
(550, 8)
['mean', 'uid']
6
(330, 8)
['mean', 'uid']
2014
1
(3093, 8)
['mean', 'uid']
4
(2196, 8)
['mean', 'uid']
5
(1396, 8)
['mean', 'uid']
6
(119, 8)
['mean', 'uid']
2015
0
(938, 8)
['mean', 'uid']
1
(1771, 8)
['mean', 'uid']
2
(10000, 8)
['mean', 'uid']
3
(10000, 8)
['mean', 'uid']
4
(6619, 8)
['mean', 'uid']
5
(1291, 8)
['mean', 'uid']
6
(1008, 8)
['mean', 'uid']
2016
0
(552, 8)
['mean', 'uid']
1
(1072, 8)
['mean', 'uid']
5
(383, 8)
['mean', 'uid']
6
(1886, 8)
['mean', 'uid']
2017
0
(1927, 8)
['mean', 'uid']
4
(970, 8)
['mean', 'uid']
5
(1773, 8)
['mean', 'uid']
6
(365, 8

In [114]:
extract_satellite_by_year(survey_df, 5000, 5000, 'slope')

2006
0
(270, 8)
['mean', 'uid']
2008
0
(1583, 8)
['mean', 'uid']
2009
0
(312, 8)
['mean', 'uid']
2010
0
(5409, 8)
['mean', 'uid']
2011
0
(2078, 8)
['mean', 'uid']
2012
0
(348, 8)
['mean', 'uid']
1
(540, 8)
['mean', 'uid']
2013
1
(1896, 8)
['mean', 'uid']
2014
1
(6804, 8)
['mean', 'uid']
2015
1
(760, 8)
['mean', 'uid']
2
(10000, 8)
['mean', 'uid']
3
(10000, 8)
['mean', 'uid']
4
(10000, 8)
['mean', 'uid']
5
(868, 8)
['mean', 'uid']
2016
5
(3893, 8)
['mean', 'uid']
2017
5
(5035, 8)
['mean', 'uid']
2018
5
(204, 8)
['mean', 'uid']
6
(2872, 8)
['mean', 'uid']
2019
6
(1372, 8)
['mean', 'uid']


[[<Task UXEEQE3OOXVKEBFJMF7KH6N6 EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2006_0 (UNSUBMITTED)>],
 [<Task 4YC6L56IRLSF7QTHK5PGHPP7 EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2008_0 (UNSUBMITTED)>],
 [<Task UIKGCVXYPVQMUHRUEDOPSYKZ EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2009_0 (UNSUBMITTED)>],
 [<Task EITEFOZAZVYZJGEEATIHTAUL EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2010_0 (UNSUBMITTED)>],
 [<Task XPKSSAWPLYBW27UID5JMOTEK EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2011_0 (UNSUBMITTED)>],
 [<Task WNDO6BXDAXYME246KNNAIFD5 EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2012_0 (UNSUBMITTED)>,
  <Task 3TBEIZWRE7VGNB7NPRTUTWDY EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2012_1 (UNSUBMITTED)>],
 [<Task HLSQVLQV75YOL3FRTNEZBBOY EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2013_1 (UNSUBMITTED)>],
 [<Task WMPWBDCAKWLSEL2HMKTNAQRA EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2014_1 (UNSUBMITTED)>],
 [<Task 6PUBTXL6Q2HEKTXUXGRFXECR EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2015_1 (UNSUBMITTED)>,
  

In [83]:
extract_satellite_by_year(survey_df, 15000, 15000, 'gridmet_drought')

2006
5
(270, 8)
['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y', 'uid']
Polling for task (id: MOLPW23HKLVMQTCYVP54STX5).
2008
0
(998, 8)
['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y', 'uid']
Polling for task (id: RDYKDELKEE3RLJKD3Y3HBURI).


KeyboardInterrupt: 

In [115]:
extract_satellite_by_year(survey_df, 5000, 5000, 'NO2')

2006
0
(270, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density', 'uid']
2008
0
(1583, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density', 'uid']
2009
0
(312, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density', 'uid']
2010
0
(5409, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density', 'uid']
2011
0
(2078, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density', 'uid']
2012
0
(348, 8)
['NO2_column_number_density', 'tropospheric_NO2_column_number_density', 'stratospheric_NO2_column_number_density', '

[[<Task 5NSSZOJAKOIY7SD4G3YWM4UV EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2006_0 (UNSUBMITTED)>],
 [<Task UXB7FCS3HBYCKQYQSAP74JIT EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2008_0 (UNSUBMITTED)>],
 [<Task DRMTAFVL6K7ZBL6QA7LO2PTM EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2009_0 (UNSUBMITTED)>],
 [<Task HZ3PKBERTB7U2VQQQSUZAOQV EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2010_0 (UNSUBMITTED)>],
 [<Task 5NG6KLGOCIGLJJUAFV776KVZ EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2011_0 (UNSUBMITTED)>],
 [<Task YWAV3FPA62ONVKTQ3SMHRBAR EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2012_0 (UNSUBMITTED)>,
  <Task UAMW5VMCIL7U6TIG5EI2O427 EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2012_1 (UNSUBMITTED)>],
 [<Task 6GRRPLEEEJZ23QVCKXO43NXN EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2013_1 (UNSUBMITTED)>],
 [<Task DAHIPDW4WWXM6XAQ4SRF2O6M EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2014_1 (UNSUBMITTED)>],
 [<Task NVIJBJXLRD7KAXIJQDB4E4NX EXPORT_FEATURES: NO2_ubuff5000_rbuff5000_2015_1 (UNSUBMITTED)>,
  <Task VEWTSE6SIMKYZV

In [68]:
121 % 60

1

## Elevation

In [56]:
elev_df = extract_satellite_by_year(survey_df, 5000, 5000, 'elevation')

#elev_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
#                            'Data', 
#                            SURVEY_NAME, 
#                            'FinalData', 
#                            'Individual Datasets',
#                            'survey_elevation_df.csv'),
#              index = False)

#print(elev_df.head())

2006
0
(270, 8)
['mean', 'uid']
Polling for task (id: T4VH6Q6VCT4DUDPYKG7KEMGB).
Polling for task (id: T4VH6Q6VCT4DUDPYKG7KEMGB).
Polling for task (id: T4VH6Q6VCT4DUDPYKG7KEMGB).
Polling for task (id: T4VH6Q6VCT4DUDPYKG7KEMGB).
2008
0
(1583, 8)
['mean', 'uid']
Polling for task (id: RK5EB2MRR7HEJ4ZJLWL4KY3J).
Polling for task (id: RK5EB2MRR7HEJ4ZJLWL4KY3J).
Polling for task (id: RK5EB2MRR7HEJ4ZJLWL4KY3J).
Polling for task (id: RK5EB2MRR7HEJ4ZJLWL4KY3J).
Polling for task (id: RK5EB2MRR7HEJ4ZJLWL4KY3J).
2009
0
(312, 8)
['mean', 'uid']
Polling for task (id: B4AQE62FYVT4LJGKMTKRZYI7).
Polling for task (id: B4AQE62FYVT4LJGKMTKRZYI7).
Polling for task (id: B4AQE62FYVT4LJGKMTKRZYI7).
Polling for task (id: B4AQE62FYVT4LJGKMTKRZYI7).
Polling for task (id: B4AQE62FYVT4LJGKMTKRZYI7).
2010
0
(5409, 8)
['mean', 'uid']
Polling for task (id: 736JS3HF73XAMZOEYUJJ4S67).
Polling for task (id: 736JS3HF73XAMZOEYUJJ4S67).
Polling for task (id: 736JS3HF73XAMZOEYUJJ4S67).
2011
0
(2078, 8)
['mean', 'uid']
Poll

EEException: Request payload size exceeds the limit: 10485760 bytes.

## Slope

In [14]:
slope_df = extract_satellite_by_year(survey_df, 5000, 5000, 'slope')

slope_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_slope_df.csv'),
              index = False)

print(slope_df.head())

2017
0
(10, 8)
       mean             uid
0  6.236195  AL201700000001
1  6.801845  AL201700000002
2  6.675056  AL201700000003
3  6.530170  AL201700000004
4  6.209261  AL201700000005


## Gridmet - Drought

In [8]:
gm_drought_df = extract_satellite_by_year(survey_df, 10000, 10000, 'gridmet_drought')

gm_drought_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_gm_drought_df.csv'),
              index = False)

print(gm_drought_df.head())

2006
11
(270, 8)
2008
0
(998, 8)
10
(585, 8)
2009
3
(312, 8)
2010
0
(541, 8)
1
(3781, 8)
2
(1087, 8)
2011
1
(341, 8)
3
(1128, 8)
10
(609, 8)
2012
2
(332, 8)
9
(215, 8)
10
(341, 8)
2013
0
(44, 8)
1
(448, 8)
2
(524, 8)
11
(550, 8)
12
(330, 8)
2014
2
(2240, 8)
3
(853, 8)
9
(2196, 8)
10
(399, 8)
11
(997, 8)
12
(119, 8)
2015
0
(938, 8)
3
(1771, 8)
4
(5000, 8)
5
(5000, 8)
6
(5000, 8)
7


KeyboardInterrupt: 

## Sentinel-5P Variables

In [83]:
#uv_aer
#CO
#HCHO
#NO2
#ozone
#SO2
#CH4

s5p_df = extract_satellite_by_year(survey_df.head(2), 5000, 5000, 'NO2')

s5p_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_s5p_uv_aer_df.csv'),
              index = False)

print(s5p_df.head())

2017
0
(2, 8)


KeyboardInterrupt: 

## World Clim Bio Variables

In [205]:
worldclim_df = extract_satellite_by_year(survey_df, 10000, 10000, 'worldclim_bio')

worldclim_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_worldclim_df.csv'),
              index = False)

print(worldclim_df.head())

2006
11
(270, 8)
2008
0
(998, 8)
10
(585, 8)
2009
3
(312, 8)
2010
0
(541, 8)
1
(3781, 8)
2
(1087, 8)
2011
1
(341, 8)
3
(1128, 8)
10
(609, 8)
2012
2
(332, 8)
9
(215, 8)
10
(341, 8)
2013
0
(44, 8)
1
(448, 8)
2
(524, 8)
11
(550, 8)
12
(330, 8)
2014
2
(2240, 8)
3
(853, 8)
9
(2196, 8)
10
(399, 8)
11
(997, 8)
12
(119, 8)
2015
0
(938, 8)
3
(1771, 8)
4
(5000, 8)
5
(5000, 8)
6
(5000, 8)
7
(5000, 8)
8
(5000, 8)
9
(1619, 8)
10
(1291, 8)
12
(1008, 8)
2016
0
(552, 8)
2
(622, 8)
3
(450, 8)
11
(383, 8)
12
(1886, 8)
2017
0
(1927, 8)
9
(970, 8)
11
(1773, 8)
12
(365, 8)
2018
1
(430, 8)
3
(401, 8)
10
(1454, 8)
11
(256, 8)
12
(535, 8)
2019
2
(195, 8)
3
(85, 8)
10
(321, 8)
11
(771, 8)
        bio01       bio02      bio03        bio04       bio05      bio06  \
0  170.811130  115.916246  59.834220  2694.423538  251.432632  59.241559   
1  171.376298  114.405584  59.772021  2671.341833  251.600364  61.803976   
2  172.315939  117.750960  59.699284  2745.070907  254.026933  58.229312   
3  177.955772  117.5189

## CSP gHM: Global Human Modification

In [49]:
glmod_df = extract_satellite_by_year(survey_df, 10000, 10000, 'GlobalHumanModification')

glmod_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_globalhumanmod_df.csv'),
              index = False)

print(glmod_df.head())

2017
0
(5, 8)
              uid  GlobalHumanModification_mean
0  AL201700000001                      0.545600
1  AL201700000002                      0.546220
2  AL201700000003                      0.537419
3  AL201700000004                      0.545217
4  AL201700000005                      0.546357


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Extract Sentinel

In [12]:
if False:
    val_s2_df = extract_satellite_by_year(survey_df, 2000, 2000, 's2')

    val_s2_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                                'Data', 
                                SURVEY_NAME, 
                                'FinalData', 
                                'Individual Datasets',
                               'survey_s2.csv'),
                  index = False)

    val_s2_df.head()

## Extract Landsat 8

In [301]:
#survey_df['chunk_id'] = range(0, survey_df.shape[0])

In [302]:
val_l8_df = extract_satellite_by_year(survey_df, 3000, 3000, 'l8')

val_l8_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_l8.csv'),
              index = False)

2012
29
(314, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2014
29
(611, 8)
2015
0
(328, 8)
1
(1000, 8)
2
(1000, 8)
3
(1000, 8)
4
(1000, 8)
5
(1000, 8)
6
(1000, 8)
7
(1000, 8)
8
(1000, 8)
9
(1000, 8)
10
(1000, 8)
11
(1000, 8)
12
(1000, 8)
13
(1000, 8)
14
(1000, 8)
15
(1000, 8)
16
(1000, 8)
17
(1000, 8)
18
(1000, 8)
19
(1000, 8)
20
(1000, 8)
21
(1000, 8)
22
(1000, 8)
23
(1000, 8)
24
(1000, 8)
25
(1000, 8)
26
(1000, 8)
27
(1000, 8)
28
(1000, 8)
29
(75, 8)
30
(430, 8)
2016
30
(383, 8)
32
(49, 8)
33
(406, 8)
2017
0
(672, 8)
30
(187, 8)
31
(1000, 8)
32
(951, 8)


## Extract Landsat 7

In [None]:
val_l7_df = extract_satellite_by_year(survey_df, 2000, 5000, 'l7')

val_l7_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_l7.csv'),
              index = False)

val_l7_df.head()

## Extract VIIRS

In [14]:
val_viirs_df = extract_satellite_by_year(survey_df, 2000, 2000, 'viirs')

val_viirs_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_viirs_2km.csv'),
              index = False)

val_viirs_df.head()

2006
118
(270, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2008
6
(94, 8)
7
(500, 8)
8
(404, 8)
102
(439, 8)
103
(146, 8)
2009
32
(161, 8)
33
(151, 8)
2010
4
(175, 8)
5
(366, 8)
12
(281, 8)
13
(500, 8)
14
(500, 8)
15
(500, 8)
16
(500, 8)
17
(500, 8)
18
(500, 8)
19
(500, 8)
20
(500, 8)
21
(500, 8)
22
(87, 8)
2011
10
(52, 8)
11
(289, 8)
33
(349, 8)
34
(500, 8)
35
(279, 8)
106
(235, 8)
107
(374, 8)
2012
28
(332, 8)
99
(215, 8)
100
(341, 8)
2013
9
(44, 8)
10
(448, 8)
22
(413, 8)
23
(111, 8)
110
(244, 8)
111
(306, 8)
120
(330, 8)
2014
23
(389, 8)
24
(500, 8)
25
(500, 8)
26
(428, 8)
28
(118, 8)
29
(305, 8)
30
(14, 8)
31
(500, 8)
32
(339, 8)
95
(411, 8)
96
(500, 8)
97
(500, 8)
98
(500, 8)
99
(285, 8)
101
(338, 8)
102
(61, 8)
115
(38, 8)
116
(454, 8)
118
(5, 8)
119
(500, 8)
120
(119, 8)
2015
1
(285, 8)
2
(500, 8)
3
(153, 8)
36
(271, 8)
37
(500, 8)
38
(500, 8)
39
(500, 8)
40
(500, 8)
41
(500, 8)
42
(500, 8)
43
(500, 8)
44
(500, 8)
45
(500, 8)
46
(500, 8)
47
(500, 8)
48
(500, 8)
49
(500, 8)
50
(500, 8)
51
(500, 8)
52
(500, 8)
53
(500, 8)
54
(500, 8)
55


Unnamed: 0,uid,viirs_avg_rad
59226,SZ200600000246,1.079012
59227,SZ200600000088,0.121461
59228,SZ200600000190,0.265568
59229,SZ200600000003,3.605093
59230,SZ200600000004,2.345308


In [16]:
val_viirs_df = extract_satellite_by_year(survey_df, 2500, 2500, 'viirs')

val_viirs_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_viirs_2_5km.csv'),
              index = False)

val_viirs_df.head()

2006
118
(270, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2008
6
(94, 8)
7
(500, 8)
8
(404, 8)
102
(439, 8)
103
(146, 8)
2009
32
(161, 8)
33
(151, 8)
2010
4
(175, 8)
5
(366, 8)
12
(281, 8)
13
(500, 8)
14
(500, 8)
15
(500, 8)
16
(500, 8)
17
(500, 8)
18
(500, 8)
19
(500, 8)
20
(500, 8)
21
(500, 8)
22
(87, 8)
2011
10
(52, 8)
11
(289, 8)
33
(349, 8)
34
(500, 8)
35
(279, 8)
106
(235, 8)
107
(374, 8)
2012
28
(332, 8)
99
(215, 8)
100
(341, 8)
2013
9
(44, 8)
10
(448, 8)
22
(413, 8)
23
(111, 8)
110
(244, 8)
111
(306, 8)
120
(330, 8)
2014
23
(389, 8)
24
(500, 8)
25
(500, 8)
26
(428, 8)
28
(118, 8)
29
(305, 8)
30
(14, 8)
31
(500, 8)
32
(339, 8)
95
(411, 8)
96
(500, 8)
97
(500, 8)
98
(500, 8)
99
(285, 8)
101
(338, 8)
102
(61, 8)
115
(38, 8)
116
(454, 8)
118
(5, 8)
119
(500, 8)
120
(119, 8)
2015
1
(285, 8)
2
(500, 8)
3
(153, 8)
36
(271, 8)
37
(500, 8)
38
(500, 8)
39
(500, 8)
40
(500, 8)
41
(500, 8)
42
(500, 8)
43
(500, 8)
44
(500, 8)
45
(500, 8)
46
(500, 8)
47
(500, 8)
48
(500, 8)
49
(500, 8)
50
(500, 8)
51
(500, 8)
52
(500, 8)
53
(500, 8)
54
(500, 8)
55


Unnamed: 0,uid,viirs_avg_rad
59226,SZ200600000246,1.519595
59227,SZ200600000088,0.134142
59228,SZ200600000190,0.273733
59229,SZ200600000003,3.965875
59230,SZ200600000004,2.193325


In [63]:
val_viirs_df = extract_satellite_by_year(survey_df, 5000, 5000, 'viirs')

val_viirs_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_viirs_5km.csv'),
              index = False)

val_viirs_df.head()

2006
11
(270, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2008
0
(998, 8)
10
(585, 8)
2009
3
(312, 8)
2010
0
(541, 8)
1
(3781, 8)
2
(1087, 8)
2011
1
(341, 8)
3
(1128, 8)
10
(609, 8)
2012
2
(332, 8)
9
(215, 8)
10
(341, 8)
2013
0
(44, 8)
1
(448, 8)
2
(524, 8)
11
(550, 8)
12
(330, 8)
2014
2
(2240, 8)
3
(853, 8)
9
(2196, 8)
10
(399, 8)
11
(997, 8)
12
(119, 8)
2015
0
(938, 8)
3
(1771, 8)
4
(5000, 8)
5
(5000, 8)
6
(5000, 8)
7
(5000, 8)
8
(5000, 8)
9
(1619, 8)
10
(1291, 8)
12
(1008, 8)
2016
0
(552, 8)
2
(622, 8)
3
(450, 8)
11
(383, 8)
12
(1886, 8)
2017
0
(1927, 8)
9
(970, 8)
11
(1773, 8)
12
(365, 8)
2018
1
(430, 8)
3
(401, 8)
10
(1454, 8)
11
(256, 8)
12
(535, 8)
2019
2
(195, 8)
3
(85, 8)
10
(321, 8)
11
(771, 8)


Unnamed: 0,uid,viirs_avg_rad
59226,SZ200600000246,2.468443
59227,SZ200600000088,0.175569
59228,SZ200600000190,0.945488
59229,SZ200600000003,3.031036
59230,SZ200600000004,2.606591


In [None]:
val_viirs_df = extract_satellite_by_year(survey_df, 10000, 10000, 'viirs')

val_viirs_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_viirs_10km.csv'),
              index = False)

val_viirs_df.head()

## Extract DMSP-OLS

In [None]:
val_dmsp_df = extract_satellite_by_year(survey_df, 2000, 5000, 'dmsp')

val_dmsp_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_dmsp.csv'),
              index = False)

val_dmsp_df.head()

## Extract WorldPop - 5km

In [8]:
val_wp_df = extract_satellite_by_year(survey_df, 5000, 5000, 'worldpop')

val_wp_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_worldpop_5km.csv'),
              index = False)

val_wp_df.head()

2012
29
(314, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2014
29
(611, 8)
2015
0
(328, 8)
1
(1000, 8)
2
(1000, 8)
3
(1000, 8)
4
(1000, 8)
5
(1000, 8)
6
(1000, 8)
7
(1000, 8)
8
(1000, 8)
9
(1000, 8)
10
(1000, 8)
11
(1000, 8)
12
(1000, 8)
13
(1000, 8)
14
(1000, 8)
15
(1000, 8)
16
(1000, 8)
17
(1000, 8)
18
(1000, 8)
19
(1000, 8)
20
(1000, 8)
21
(1000, 8)
22
(1000, 8)
23
(1000, 8)
24
(1000, 8)
25
(1000, 8)
26
(1000, 8)
27
(1000, 8)
28
(1000, 8)
29
(75, 8)
30
(430, 8)
2016
30
(383, 8)
32
(49, 8)
33
(406, 8)
2017
0
(672, 8)
30
(187, 8)
31
(1000, 8)
32
(951, 8)


Unnamed: 0,uid,worldpop_sum
29676,KY201200000105,3712.264753
29677,KY201200000106,8294.951383
29678,KY201200000107,2691.943851
29679,KY201200000108,15902.517072
29680,KY201200000109,16447.350292


## Extract WorldPop - 10km

In [7]:
val_wp_df = extract_satellite_by_year(survey_df, 10000, 10000, 'worldpop')

val_wp_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_worldpop_10km.csv'),
              index = False)

val_wp_df.head()

2012
29
(314, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2014
29
(611, 8)
2015
0
(328, 8)
1
(1000, 8)
2
(1000, 8)
3
(1000, 8)
4
(1000, 8)
5
(1000, 8)
6
(1000, 8)
7
(1000, 8)
8
(1000, 8)
9
(1000, 8)
10
(1000, 8)
11
(1000, 8)
12
(1000, 8)
13
(1000, 8)
14
(1000, 8)
15
(1000, 8)
16
(1000, 8)
17
(1000, 8)
18
(1000, 8)
19
(1000, 8)
20
(1000, 8)
21
(1000, 8)
22
(1000, 8)
23
(1000, 8)
24
(1000, 8)
25
(1000, 8)
26
(1000, 8)
27
(1000, 8)
28
(1000, 8)
29
(75, 8)
30
(430, 8)
2016
30
(383, 8)
32
(49, 8)
33
(406, 8)
2017
0
(672, 8)
30
(187, 8)
31
(1000, 8)
32
(951, 8)


Unnamed: 0,uid,worldpop_sum
29676,KY201200000105,15985.919922
29677,KY201200000106,69026.916904
29678,KY201200000107,11360.145576
29679,KY201200000108,45420.083288
29680,KY201200000109,37266.66632


In [None]:
#import folium
#from folium import plugins

#mapid = survey_i_fc.getMapId()

#map = folium.Map(location=[80.773137, 7.873592])
#folium.TileLayer(
#    tiles=mapid['tile_fetcher'].url_format,
#    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#    overlay=True,
#    name='border',
#  ).add_to(map)

#map.add_child(folium.LayerControl())


In [None]:
# DEBUGGING

In [None]:
buffer_size_urban = 5000
buffer_size_rural = 5000
satellite = 'worldpop'

survey_df_i = survey_df.head(20)

# Scale
SCALE = 100 

# Year
year_use = 2015

year_plus = year_use
year_minus = year_use

year_minus_str = str(year_minus) + '-01-01'
year_plus_str = str(year_plus) + '-12-31'

image = ee.ImageCollection('WorldPop/GP/100m/pop')\
    .filterDate(year_minus_str, year_plus_str)\
    .median()

BANDS = ['population']

#
survey_fc = survey_to_fc_buffer(survey_df_i, buffer_size_urban, buffer_size_rural)


# Extract Values ---------------------------------------------------
#print(survey_fc.size().getInfo())

vals = image.reduceRegions(collection = survey_fc,
                           reducer = ee.Reducer.sum(),
                           scale = SCALE,
                           tileScale = 8)

#survey_df = survey_df[['uid']]

a = vals.aggregate_array('sum').getInfo()
print(a)

if False:

    #print(BANDS)
    for band_i in BANDS:
        #print(band_i)
        #a = vals.aggregate_array(band_i).getInfo()
        #print(len(a))
        survey_df_i[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()

    #val_wp_df = extract_satellite_by_year(survey_df, 5000, 5000, 'worldpop')

In [None]:
# DEBUGGING SENTINEL!!!
if False:
    buffer_size_urban = 2000
    buffer_size_rural = 2000

    survey_df_i = survey_df[survey_df['chunk_id'] == 1]
    survey_df_i.shape
    SCALE = 100 # ok to upscale

    # Year
    # sentinel starts in March 2017; juse use 2018
    year_use = 2018

    year_plus = year_use + 1
    year_minus = year_use - 1

    year_minus_str = str(year_minus) + '-01-01'
    year_plus_str = str(year_plus) + '-12-31'

    image = ee.ImageCollection('COPERNICUS/S2_SR')\
        .filterDate(year_minus_str, year_plus_str)\
        .map(cloud_mask_sentinel2)\
        .median()\
        .multiply(0.0001)

    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
    image = image.addBands(ndvi)

    BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

    image = image.select(BANDS)  

    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df_i, buffer_size_urban, buffer_size_rural)

    # Extract Values ---------------------------------------------------
    vals = image.reduceRegions(collection = survey_fc,
                               reducer = ee.Reducer.mean(),
                               scale = SCALE,
                               tileScale = 8)

    a = eeconvert.fcToDf(vals)
    #band_i = 'B1'
    #a = vals.aggregate_array(band_i)
    a = ee.Feature(vals.first()).select(['B1'])
    print(a.getInfo())