In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Imports
import ee
import pandas as pd
import gee_utils
import numpy as np

import math

In [10]:
# Trigger the authentication flow.
ee.Authenticate()

Enter verification code: 4/1AWtgzh6VjkptUy4T_7CNL7KGxZfpg2hxvbFLNtAyxRhkLKsDC8Aqi1qXH30

Successfully saved authorization token.


In [4]:
# Initialize the library.
ee.Initialize()

In [21]:
# Export parameters
EXPORT = 'drive'
BUCKET = None

# Directory setup
LSMS_EXPORT_FOLDER = 'mwi_lsms_tfrecords_raw'
LSMS_CSV_PATH = '../data/mwi_lsms_clusters.csv'

In [22]:
df_lsms = pd.read_csv(LSMS_CSV_PATH)

# df_lsms_eth = df_lsms_eth.head(1)
print(df_lsms)

      country  consumption  density        lat        lon
0         mwi     0.215783        1 -15.965693  35.392825
1         mwi     0.226473        1 -16.392512  34.890469
2         mwi     0.245305        1 -16.226333  34.824941
3         mwi     0.249300        1 -17.095150  35.217213
4         mwi     0.256492        1 -14.846169  35.500090
...       ...          ...      ...        ...        ...
12439     mwi    79.715353        0 -14.023146  33.749996
12440     mwi    99.133412        0 -15.385744  35.347682
12441     mwi   106.256862        0 -15.390320  35.315213
12442     mwi   812.808083        1 -14.082816  34.905699
12443     mwi  1601.080498        0 -15.804976  34.994405

[12444 rows x 5 columns]


In [None]:
# PLEASE READ: For handling dataframe with multiple countries only, the hacky way
countries = ['ethiopia', 'nigeria', 'malawi']
df_lsms_to_download = pd.DataFrame(columns=['country', 'consumption', 'density', 'lat', 'lon'])
for c in countries:
    df_country_lsms = df_lsms.loc[df_lsms['country'] == c]
    # Sample 1000 locations from each country
    df_country_lsms_samples = df_country_lsms.sample(n=1000)
    df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)

df_lsms = df_lsms_to_download
df_lsms.reset_index(drop=True, inplace=True)

consumption = df_lsms['consumption'].values.tolist()
print(f'Mean: {np.mean(consumption)}')
print(f'STD: {np.std(consumption)}')

# Uncomment to save
# df_lsms.to_csv('ENTER_NAME_lsms_clusters.csv', index=False)

In [24]:
MS_BANDS = ['BLUE', 'GREEN', 'RED', 'NIR', 'SW_IR1', 'SW_IR2', 'TEMP']

PROJECTION = 'EPSG:3857'
SCALE = 30              
EXPORT_TILE_RADIUS = 127  # image dimension = (2*EXPORT_TILE_RADIUS) + 1 = 255px
CHUNK_SIZE = 25         # set to a small number (<= 50) if Google Earth Engine reports memory errors

In [25]:
def export_images(df, country, export_folder, chunk_size):
    num_chunks = int(math.ceil(len(df) / chunk_size))
    tasks = {}

    for i in range(num_chunks):
        chunk_slice = slice(i * chunk_size, (i+1) * chunk_size - 1)
        fc = gee_utils.create_fc(df.loc[chunk_slice, :])

        roi = fc.geometry()
        composite_img = gee_utils.create_ms_img(roi).select(MS_BANDS)

        # Add relevant bands
        composite_img = gee_utils.add_latlon_band(composite_img)
        composite_img = gee_utils.add_nl_band(composite_img)
        composite_img = gee_utils.add_deltatemp_band(composite_img)
        composite_img = gee_utils.add_pollution_band(composite_img)
                
        fname = f'{country}_{i:02d}'
        tasks[(export_folder, country, i)] = gee_utils.make_tiles(
            img=composite_img, scale=SCALE, ksize=EXPORT_TILE_RADIUS,
            points=fc, export='drive',
            prefix=export_folder, fname=fname,
            bucket=None)
    return tasks

In [27]:
#Change country param when downloading imagery
new_task = export_images(df_lsms, country='malawi', export_folder=LSMS_EXPORT_FOLDER, chunk_size=CHUNK_SIZE)