In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
# Imports
import ee
import pandas as pd
import gee_utils

import math

In [10]:
# Trigger the authentication flow.
ee.Authenticate()

Enter verification code: 4/1AWtgzh6VjkptUy4T_7CNL7KGxZfpg2hxvbFLNtAyxRhkLKsDC8Aqi1qXH30

Successfully saved authorization token.


In [31]:
# Initialize the library.
ee.Initialize()

In [39]:
# Export parameters
EXPORT = 'drive'
BUCKET = None

# Directory setup
LSMS_EXPORT_FOLDER = 'pollution_lsms_tfrecords_raw'
LSMS_CSV_PATH = '../data/lsms_clusters.csv'

In [40]:
df_lsms = pd.read_csv(LSMS_CSV_PATH)

# df_lsms_eth = df_lsms_eth.head(1)
print(df_lsms)

        country  consumption  density        lat        lon
0      ethiopia     0.061442        1  14.029943  38.541546
1      ethiopia     0.065432        1  14.029943  38.541546
2      ethiopia     0.112242        1   9.328209  42.142937
3      ethiopia     0.123466        1   7.879094  37.580292
4      ethiopia     0.214926        1   6.375622  42.246292
...         ...          ...      ...        ...        ...
23928    malawi    79.715353        0 -14.023146  33.749996
23929    malawi    99.133412        0 -15.385744  35.347682
23930    malawi   106.256862        0 -15.390320  35.315213
23931    malawi   812.808083        1 -14.082816  34.905699
23932    malawi  1601.080500        0 -15.804976  34.994405

[23933 rows x 5 columns]


In [52]:
# For handling dataframe with multiple countries only
countries = ['ethiopia', 'nigeria', 'malawi']
df_lsms_to_download = pd.DataFrame(columns=['country', 'consumption', 'density', 'lat', 'lon'])
for c in countries:
    df_country_lsms = df_lsms.loc[df_lsms['country'] == c]
    df_country_lsms_samples = df_country_lsms.sample(n=1000)
    df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)

print(df_lsms_to_download)

        country  consumption density        lat        lon
1410   ethiopia     1.778661       0   9.610875  41.868992
4335   ethiopia     4.648180       0   9.611356  41.851933
975    ethiopia     1.447522       1   4.010758  41.765862
4839   ethiopia     5.558277       1   8.029937  41.950932
3940   ethiopia     4.078467       0   8.978465  38.712986
...         ...          ...     ...        ...        ...
20990    malawi     2.878867       1 -13.557205  32.897253
18361    malawi     1.927002       1 -13.362015  34.264738
22820    malawi     4.904841       0 -13.981185  33.794726
21529    malawi     3.225654       1 -13.405444  33.380390
12050    malawi     0.691251       1 -13.226167  33.740380

[3000 rows x 5 columns]


  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)
  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)
  df_lsms_to_download = df_lsms_to_download.append(df_country_lsms_samples)


In [53]:
MS_BANDS = ['BLUE', 'GREEN', 'RED', 'NIR', 'SW_IR1', 'SW_IR2', 'TEMP']

PROJECTION = 'EPSG:3857'
SCALE = 30              
EXPORT_TILE_RADIUS = 127  # image dimension = (2*EXPORT_TILE_RADIUS) + 1 = 255px
CHUNK_SIZE = 25         # set to a small number (<= 50) if Google Earth Engine reports memory errors

In [54]:
def export_images(df, country, export_folder, chunk_size):
    num_chunks = int(math.ceil(len(df) / chunk_size))
    tasks = {}

    for i in range(num_chunks):
        chunk_slice = slice(i * chunk_size, (i+1) * chunk_size - 1)
        fc = gee_utils.create_fc(df.loc[chunk_slice, :])

        roi = fc.geometry()
        composite_img = gee_utils.create_ms_img(roi).select(MS_BANDS)

        # Add relevant bands
        composite_img = gee_utils.add_latlon_band(composite_img)
        composite_img = gee_utils.add_nl_band(composite_img)
        composite_img = gee_utils.add_deltatemp_band(composite_img)
        composite_img = gee_utils.add_pollution_band(composite_img)
                
        fname = f'{country}_{i:02d}'
        tasks[(export_folder, country, i)] = gee_utils.make_tiles(
            img=composite_img, scale=SCALE, ksize=EXPORT_TILE_RADIUS,
            points=fc, export='drive',
            prefix=export_folder, fname=fname,
            bucket=None)
    return tasks

In [None]:
#Change country param when downloading imagery
new_task = export_images(df_lsms, country='pollution', export_folder=LSMS_EXPORT_FOLDER, chunk_size=CHUNK_SIZE)