In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
# Imports
import ee
import pandas as pd
import gee_utils
import numpy as np

import math

In [10]:
# Trigger the authentication flow.
ee.Authenticate()

Enter verification code: 4/1AWtgzh6VjkptUy4T_7CNL7KGxZfpg2hxvbFLNtAyxRhkLKsDC8Aqi1qXH30

Successfully saved authorization token.


In [60]:
# Initialize the library.
ee.Initialize()

In [84]:
# Export parameters
EXPORT = 'drive'
BUCKET = None

# Directory setup
LSMS_EXPORT_FOLDER = 'pollution_lsms_tfrecords_raw'
LSMS_CSV_PATH = '../data/pollution_lsms_clusters.csv'

In [85]:
df_lsms = pd.read_csv(LSMS_CSV_PATH)

# df_lsms_eth = df_lsms_eth.head(1)
print(df_lsms)

       country  consumption  density        lat        lon
0     ethiopia     1.663807        1   8.970294  36.525711
1     ethiopia     3.794884        1   7.006350  40.995022
2     ethiopia     7.641642        0   9.612397  41.871620
3     ethiopia     9.291895        1  14.477154  39.540272
4     ethiopia     2.056143        0   8.368388  33.434830
...        ...          ...      ...        ...        ...
2995    malawi     1.242456        1 -13.946321  33.186604
2996    malawi     0.615833        1 -15.804177  35.793425
2997    malawi     7.533418        0 -13.982346  33.806385
2998    malawi     3.969582        1 -15.403323  34.682855
2999    malawi    13.591838        0 -15.744234  35.098483

[3000 rows x 5 columns]


In [88]:
# For handling dataframe with multiple countries only, the hacky way
countries = ['ethiopia', 'nigeria', 'malawi']
df_lsms_to_download = pd.DataFrame(columns=['country', 'consumption', 'density', 'lat', 'lon'])
for c in countries:
    df_country_lsms = df_lsms.loc[df_lsms['country'] == c]
    # Sample 1000 locations from each country
    df_country_lsms_samples = df_country_lsms.sample(n=1000)
    df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)

df_lsms = df_lsms_to_download
df_lsms.reset_index(drop=True, inplace=True)

consumption = df_lsms['consumption'].values.tolist()
print(f'Mean: {np.mean(consumption)}')
print(f'STD: {np.std(consumption)}')

# Uncomment to save
# df_lsms.to_csv('ENTER_NAME_lsms_clusters.csv', index=False)

Mean: 3.9194884489386665
STD: 3.818122106900523


  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)
  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)
  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)


In [80]:
MS_BANDS = ['BLUE', 'GREEN', 'RED', 'NIR', 'SW_IR1', 'SW_IR2', 'TEMP']

PROJECTION = 'EPSG:3857'
SCALE = 30              
EXPORT_TILE_RADIUS = 127  # image dimension = (2*EXPORT_TILE_RADIUS) + 1 = 255px
CHUNK_SIZE = 25         # set to a small number (<= 50) if Google Earth Engine reports memory errors

In [81]:
def export_images(df, country, export_folder, chunk_size):
    num_chunks = int(math.ceil(len(df) / chunk_size))
    tasks = {}

    for i in range(num_chunks):
        chunk_slice = slice(i * chunk_size, (i+1) * chunk_size - 1)
        fc = gee_utils.create_fc(df.loc[chunk_slice, :])

        roi = fc.geometry()
        composite_img = gee_utils.create_ms_img(roi).select(MS_BANDS)

        # Add relevant bands
        composite_img = gee_utils.add_latlon_band(composite_img)
        composite_img = gee_utils.add_nl_band(composite_img)
        composite_img = gee_utils.add_deltatemp_band(composite_img)
        composite_img = gee_utils.add_pollution_band(composite_img)
                
        fname = f'{country}_{i:02d}'
        tasks[(export_folder, country, i)] = gee_utils.make_tiles(
            img=composite_img, scale=SCALE, ksize=EXPORT_TILE_RADIUS,
            points=fc, export='drive',
            prefix=export_folder, fname=fname,
            bucket=None)
    return tasks

In [82]:
#Change country param when downloading imagery
new_task = export_images(df_lsms, country='ethiopia', export_folder=LSMS_EXPORT_FOLDER, chunk_size=CHUNK_SIZE)