In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import requests
from requests.auth import HTTPBasicAuth
from multiprocessing.dummy import Pool as ThreadPool

from planet_utils import get_datetime, get_save_search, activate_download_item, load_img_ids, save_img_ids
from pathum_filter import adm1_pathum_bbox_df as pathum_geometry
from pathum_filter import get_pathum_filter, get_stats, get_ids

In [2]:
DEBUG = False #True

## User Params
item_type = "PSOrthoTile"
asset_type = "visual"
img_folder = './data'
img_id_file = 'img_ids.txt'

start_date = get_datetime(2019, 1, 1)
end_date = get_datetime(2019, 6, 1)

In [3]:
#%% show AOI
if DEBUG:
    from geojsonio import display
    display(pathum_geometry.to_json())

#%% get number of available images
# aoi is the bounding box of Pathum Thani
aoi_geo = pathum_geometry.__geo_interface__['features'][0]['geometry']
pathum_filter = get_pathum_filter(start_date, end_date, aoi_geo)
result = get_stats(pathum_filter)
print(result)

# convert to DataFrame for tidiness
df = pd.DataFrame.from_dict(result.json()['buckets'])
df.set_index('start_time', inplace=True)
df.index = pd.to_datetime(df.index)
df.head()

<Response [200]>


Unnamed: 0_level_0,count
start_time,Unnamed: 1_level_1
2019-01-01 00:00:00+00:00,11
2019-01-02 00:00:00+00:00,4
2019-01-03 00:00:00+00:00,0
2019-01-04 00:00:00+00:00,0
2019-01-05 00:00:00+00:00,0


In [4]:
#%% show number of images to download
if DEBUG:
    import pandas_bokeh
    pandas_bokeh.output_notebook()
    p = df.plot_bokeh(kind="barh", hovertool=False, show_figure=False)
    p.xaxis.axis_label = 'Number of Images'
    p.yaxis.axis_label = 'Acquired Date'
    p.title.text = f'Pathum Thani: Total {df["count"].sum()} Image from {start_date[:10]} to {end_date[:10]}'

    from bokeh.models import HoverTool
    h = HoverTool(mode="hline")
    h.tooltips = [('Acquired date', '@__x__values_original{%Y-%m-%d}'), ('No. of imgs', '@count{%d}')]
    #["@__x__values_original{%Y-%m-%d %H:%M:%S}"
    h.formatters = {'__x__values_original': 'datetime','count':'printf'}
    #{"__x__values_original": "datetime"}
    p.add_tools(h)
    pandas_bokeh.show(p)

In [5]:
#%% get ids of images using saved search, 
if os.path.isfile(img_id_file):  # or load from previous search
    search_id, img_ids = load_img_ids(img_id_file)
else:
    search_id, img_ids = get_save_search(pathum_filter)
    
    save_img_ids(img_id_file, search_id, img_ids)

In [6]:
#%% get destination of all images
df = pd.DataFrame({'id':img_ids, 
                   'year' : [im_id[16:20] for im_id in img_ids], 
                   'month': [im_id[21:23] for im_id in img_ids], 
                   'date' : [im_id[16:26] for im_id in img_ids]})

def get_dest(r):
    return os.path.join(img_folder, 
                        item_type, 
                        asset_type, 
                        r.year, 
                        r.month, 
                        r.date, 
                        r.id + '.tiff')

df['dest'] = df.apply(get_dest, axis=1)
df.head()

Unnamed: 0,id,year,month,date,dest
0,2232607_4745521_2019-03-26_106d,2019,3,2019-03-26,./data/PSOrthoTile/visual/2019/03/2019-03-26/2...
1,2232607_4745622_2019-03-26_106d,2019,3,2019-03-26,./data/PSOrthoTile/visual/2019/03/2019-03-26/2...
2,2232607_4745621_2019-03-26_106d,2019,3,2019-03-26,./data/PSOrthoTile/visual/2019/03/2019-03-26/2...
3,2253646_4745522_2019-04-03_1060,2019,4,2019-04-03,./data/PSOrthoTile/visual/2019/04/2019-04-03/2...
4,2253646_4745521_2019-04-03_1060,2019,4,2019-04-03,./data/PSOrthoTile/visual/2019/04/2019-04-03/2...


In [7]:
# create dir for all files
for item_dest in df['dest']:
    directory = os.path.dirname(item_dest)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [None]:
#%% activate and download all images
# setup auth
session = requests.Session()
session.auth = (os.environ['PL_API_KEY'], '')

def act_dl_itm(item_id, item_dest):
    activate_download_item(session, item_id, item_dest, asset_type="visual", item_type="PSOrthoTile")

# An easy way to parallise I/O bound operations in Python
# is to use a ThreadPool.
parallelism = 8
thread_pool = ThreadPool(parallelism)
thread_pool.starmap(act_dl_itm, df[['id','dest']].itertuples(index=False))

already have: 	 2232607_4745521_2019-03-26_106d
already have: 	 2309594_4745523_2019-04-21_106e
already have: 	 2232607_4745622_2019-03-26_106d
already have: 	 2333864_4745523_2019-05-01_1066already have: 	 2407085_4745620_2019-05-31_1067
activate: 	 2407085_4745520_2019-05-31_1067
already have: 	 2387541_4745521_2019-05-24_0f22

already have: 	 2232607_4745621_2019-03-26_106d
activate: 	 2253646_4745522_2019-04-03_1060
activate: 	 2309594_4745623_2019-04-21_106e
activate: 	 2376882_4745620_2019-05-20_0f3d
already have: 	 2387541_4745620_2019-05-24_0f22
activate: 	 2333864_4745522_2019-05-01_1066activate: 	 2387541_4745621_2019-05-24_0f22

already have: 	 2369211_4745522_2019-05-17_103ealready have: 	 2341954_4745523_2019-05-07_0f4d
activate: 	 2341954_4745623_2019-05-07_0f4d

activate: 	 2369211_4745622_2019-05-17_103e
activate: 	 2341954_4745623_2019-05-07_0f4d   retrying
activate: 	 2387541_4745621_2019-05-24_0f22   retrying
activate: 	 2253646_4745522_2019-04-03_1060   retrying
act

In [None]:
print('Done')