# Download Satellite images
Note, this file is best run from google colab. To run smoothly, ensure that the package satimg_utils is in the same directory as this script and that the present notebook is stored within the current working directory.

In [1]:
import ee
import pandas as pd
import numpy as np
from typing import Dict, Tuple, Any

In [2]:
from satimg_utils.gee_classes import *
from satimg_utils.gee_helpers import *
from satimg_utils.gee_exporters import *

In [None]:
# mount the google drive
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/master_thesis/

In [10]:
ee.Authenticate()

Enter verification code: 4/1AZEOvhWk-wrvoUM9_AngiGUhRyQpN-yBjn6GnwOHqJbvltSA52rNntoCrx4

Successfully saved authorization token.


In [3]:
ee.Initialize()

# Load LSMS Data

In [4]:
source_dir = '../'
LSMS_CSV_PATH = f'{source_dir}/Data/lsms/processed/labels_cluster_v1.csv'

lsms_df = pd.read_csv(LSMS_CSV_PATH, float_precision='high')
lsms_df = lsms_df[['country', 'start_ts', 'lat', 'lon', 'series', 'cluster_id', 'unique_id']].copy()
lsms_df['RS_file_name'] = 'RS_v2_'+lsms_df['unique_id'] + ".tif"
lsms_df['WP_file_name'] = 'WP_'+lsms_df['unique_id'] + ".tif"

# Download Remote Sensing images

In [None]:
# Download RS images
rs_downloaded_files = check_downloaded_files(prefix = "RS", source_dir = source_dir)
rs_mask = ~np.array([i in rs_downloaded_files for i in lsms_df['RS_file_name']])

to_download_df = lsms_df.loc[rs_mask,:].copy().reset_index(drop = True)
print(len(to_download_df))

7141


In [None]:
lsms_surveys = list(to_download_df.groupby(['country', 'series', 'start_ts', 'cluster_id', 'unique_id']).groups.keys())

tasks = {}
tab_feats = []
count = 0

for country, series, start_ts, cluster_id, unique_id in tqdm(lsms_surveys):
    new_tasks, feats = export_rs_images(
        df=lsms_df, country=country,
        series = series, start_ts = start_ts,
        unique_id = unique_id)
    tasks.update(new_tasks)
    count += 1
    if count == 2999:
      print(f'Going to sleep for {60*30} seconds')
      time.sleep(60*30)
      count = 0

  0%|          | 0/897 [00:00<?, ?it/s]

In [None]:
wait_on_tasks(tasks)

# Download WorldPop Data

In [None]:
# Download RS images
wp_downloaded_files = check_downloaded_files(prefix = "WP", source_dir = source_dir)
wp_mask = ~np.array([i in wp_downloaded_files for i in lsms_df['WP_file_name']])

to_download_df = lsms_df.loc[wp_mask,:].copy().reset_index(drop = True)
print(len(to_download_df))

940


In [None]:
lsms_surveys = list(to_download_df.groupby(['country', 'series', 'start_ts', 'cluster_id', 'unique_id']).groups.keys())

tasks = {}
count = 0

for country, series, start_ts, cluster_id, unique_id in tqdm(lsms_surveys):
    new_tasks = export_wp_imgs(
        df=lsms_df, country=country,
        series = series, start_ts = start_ts,
        unique_id = unique_id)
    tasks.update(new_tasks)
    count += 1
    if count == 2999:
      print(f'Going to sleep for {60*30} seconds')
      #time.sleep(60*30)
      count = 0

  0%|          | 0/940 [00:00<?, ?it/s]

# Download Precipiation data

In [None]:
lsms_surveys = list(lsms_df.groupby(['country', 'start_ts', 'unique_id', 'lat', 'lon']).groups.keys())

weather_feats = {key: None for key in lsms_df.unique_id}

for country, start_ts, unique_id, lat, lon in tqdm(lsms_surveys):
    subset_df = lsms_df[(lsms_df['unique_id'] == unique_id)].reset_index(drop = True)
    fcPoint = df_to_fc(subset_df)

    # get the RS derived datasets
    rs = RS_Feats(fcPoint.geometry(), start_ts = start_ts)

    precip_data = rs.init_precip().data
    weather_feats[unique_id] = precip_data

  0%|          | 0/7141 [00:00<?, ?it/s]

In [None]:
weather_df = pd.DataFrame({
    'unique_id': weather_feats.keys(),
    'precipitation': weather_feats.values()
    })
weather_df.to_csv("Data/precipitation.csv", index = False)

# Download WSF data

In [5]:
cluster_df = lsms_df[['country', 'cluster_id', 'lat', 'lon']].drop_duplicates().reset_index(drop = True).copy()
lsms_surveys = list(cluster_df.groupby(['country', 'cluster_id']).groups.keys())

In [6]:
tasks = {}

for country, cluster_id in tqdm(lsms_surveys):
    new_tasks = export_wsf_images(
        df = cluster_df, 
        country = country,
        cluster_id = cluster_id)
    tasks.update(new_tasks)

  0%|          | 0/2255 [00:00<?, ?it/s]

In [7]:
wait_on_tasks(tasks)

  0%|          | 0/2255 [00:00<?, ?it/s]

Task ('WSF_raw', 'eth', 'WSF_eth_010101088801601') finished in 0 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010102088801403') finished in 0 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010103010100106') finished in 0 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010103088801804') finished in 0 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010105088800204') finished in 0 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010106088800402') finished in 1 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010107010100116') finished in 1 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010201088800207') finished in 1 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010202020100107') finished in 1 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010202088800802') finished in 1 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_010203088801001') finished in 1 min with state: COMPLETED

Task ('WSF_raw', 'eth', 'WSF_eth_030309010100308') finished in 8 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030310088801701') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030403088801401') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030406088800801') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030409088801203') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030411088800903') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030412010100105') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030414088802101') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030416088800204') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030418010100309') finished in 9 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_030418010100807') finished in 9 min with state: COMPLETED

Task ('WSF_raw', 'eth', 'WSF_eth_040707088802702') finished in 16 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040712088802302') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040713010100209') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040806088800802') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040808010100107') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040812088802401') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040817088802801') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040825010100102') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040901088800304') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040907088800105') finished in 17 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_040909010100105') finished in 17 min with state

Task ('WSF_raw', 'eth', 'WSF_eth_060608088801401') finished in 24 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070103088800404') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070105088802502') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070108088800701') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070110010100102') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070111088800401') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070112088802605') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070115010100108') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070202010100105') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070202088801001') finished in 25 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_070205088801302') finished in 26 min with state

Task ('WSF_raw', 'eth', 'WSF_eth_072219010100115') finished in 33 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100219') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100401') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100511') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100606') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100636') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100723') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_072219010100753') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_120101010100103') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_120101088800401') finished in 34 min with state: COMPLETED
Task ('WSF_raw', 'eth', 'WSF_eth_120102088800401') finished in 35 min with state

Task ('WSF_raw', 'mwi', 'WSF_mwi_10401140') finished in 44 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10401502') finished in 44 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10401522') finished in 44 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10404186') finished in 44 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10405213') finished in 44 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10501505') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10501709') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10502268') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10502593') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10503367') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10505136') finished in 45 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_10505310') finished in 45 min wi

Task ('WSF_raw', 'mwi', 'WSF_mwi_21087531') finished in 57 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_21088597') finished in 57 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30101280') finished in 57 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30101545') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30102028') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30102540') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30103074') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30104708') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30105325') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30105448') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30107624') finished in 58 min with state: COMPLETED
Task ('WSF_raw', 'mwi', 'WSF_mwi_30108401') finished in 59 min wi

Task ('WSF_raw', 'nga', 'WSF_nga_1020_38') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1021_120') finished in 67 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1025_1774') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_102_1690') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_102_536') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_104_1364') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_105_638') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_106_880') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_107_1044') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_108_904') finished in 68 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1101_154') finished in 69 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1102_936') finished in 69 min with s

Task ('WSF_raw', 'nga', 'WSF_nga_1722_96') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1724_786') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1727_634') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1803_840') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1805_534') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1808_1726') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1809_1272') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1811_364') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1814_678') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1816_378') finished in 76 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1817_370') finished in 77 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_1820_450') finished in 77 min w

Task ('WSF_raw', 'nga', 'WSF_nga_2316_140') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2401_766') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2403_1588') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2403_3592') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2403_674') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2403_7586') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2405_134') finished in 84 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2407_396') finished in 85 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2408_2074') finished in 85 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2411_788') finished in 85 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2412_2798') finished in 85 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_2413_306') finished in 85 m

Task ('WSF_raw', 'nga', 'WSF_nga_3015_1688') finished in 92 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3016_390') finished in 92 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3018_914') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_301_1198') finished in 92 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3020_322') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3023_622') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3024_1406') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3024_1924') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3025_2052') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3030_946') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3032_1088') finished in 93 min with state: COMPLETED
Task ('WSF_raw', 'nga', 'WSF_nga_3032_520') finished in 94 m

NameError: name 'time' is not defined