## Preliminaries: run the below 2 cells first

In [None]:
import time
import warnings
import math
from pathlib import Path
import requests
import pandas as pd
import geopandas as gpd
import json
import datetime
import plotly.express as px
import plotly.graph_objects as go
import pickle
import numpy as np

In [None]:
api_key = "" #Put skywatch api key here
headers = {'Content-Type': 'application/json', 'x-api-key': api_key} #Used to construct queries. Do not change

#Path to put all outputs
out_path = Path("../data/raw/planetscope/")
#AOI geometry file used to query API
aoi_file = Path("../data/processed/Beitbridge_PS_AOI_Final.gpkg")
aoi_data = gpd.read_file(aoi_file)
aoi_json = json.loads(aoi_data.to_json())

o_start_date = "2023-01-01" #Overall start date for query
o_end_date = "2023-03-27" #Overall end date for query
aoi_name = "BeitBridge" #Name for AOI

## Search API

In [None]:
date_interval = 90

o_start_datetime = datetime.datetime.strptime(o_start_date, "%Y-%m-%d")
o_end_datetime = datetime.datetime.strptime(o_end_date, "%Y-%m-%d")
num_intervals = math.floor((o_end_datetime - o_start_datetime).days / date_interval)

date_list = []

if num_intervals > 0:
    starting_date = None
    for i in range(0, num_intervals):
        if not starting_date:
            starting_date = o_start_datetime
        ending_date = starting_date + datetime.timedelta(days=date_interval)

        date_list.append({"start_date": datetime.datetime.strftime(starting_date, "%Y-%m-%d"), "end_date": datetime.datetime.strftime(ending_date, "%Y-%m-%d")})
        starting_date = ending_date + datetime.timedelta(days=1)

    final_start_date = ending_date + datetime.timedelta(days=1)
    date_list.append({"start_date": datetime.datetime.strftime(final_start_date, "%Y-%m-%d"), "end_date": o_end_date})

else:
    date_list.append({"start_date": o_start_date, "end_date": o_end_date})

In [None]:
def get_sat_type(sat_id, source):
    dove_r_list = ["1047", "1057", "1058", "1059", "105a", "105b", "105d", "105e", "105f", "1060", "1061", "1062", "1063", "1064", "1065", "1066", "1067", "1068", "1069", "106a", "106b", "106c", "106d", "106e", "106f"]

    sat_parse = sat_id.split('_')[-1]
    if source == "PlanetScope-SuperDove":
        sat_type = 'superdove'
    if source == "PlanetScope":
        sat_type = 'dove-c'
    
    if any(sat in sat_parse for sat in dove_r_list):
        sat_type = "dove-r"

    return sat_type

def create_search_query(start_date, end_date, aoi_json):
    query = {
            "location": {
                "type": "Polygon",
                "coordinates": aoi_json["features"][0]["geometry"]["coordinates"][0]
            },
            "start_date": start_date,
            "end_date": end_date,
            "resolution": ["medium"],
            "coverage": 50.0,
            "interval_length": 7,
            "order_by": "date"
        }
    
    return query

def search_catalog(query, headers):
    search_url = "https://api.skywatch.co/earthcache/archive/search"
    search_resp = requests.post(url=search_url, data=json.dumps(query), headers=headers).json()
    search_id = search_resp['data']['id']
    return search_id

def get_search_results(search_id, headers):
    search_url = f"https://api.skywatch.co/earthcache/archive/search/{search_id}/search_results"
    search_resp = requests.get(url=search_url, headers=headers)

    if search_resp.status_code == 202:
        while search_resp.status_code == 202:
            print("Search still processing. Waiting 5 seconds.")
            time.sleep(5)
            search_resp = requests.get(url=search_url, headers=headers)
    
    return search_resp.json()

def parse_results(search_resp, headers, search_id, df_list):
    for item in search_resp["data"]:
        datestr = item['start_time'].split("T")[0]
        date_obj = datetime.datetime.strptime(datestr, "%Y-%m-%d")
        year = date_obj.year
        month = date_obj.month
        isodate = date_obj.isocalendar()
        if isodate[0] == (year - 1) and isodate[1] == 52:
            week = 1
        else:
            week = isodate[1]
        
        sat_type = get_sat_type(item['product_name'], item['source'])
        item_df = pd.DataFrame([{"search_id": search_id, "id": item['id'], "product_name": item['product_name'], "datestr": datestr, 
                                "date": date_obj, "year": year, "month": month, "week": week, "source": item['source'], "sat_type": sat_type, "area_sq_km": 
                                item['area_sq_km'], "cloud_cover": item['result_cloud_cover_percentage'], "aoi_coverage": item["location_coverage_percentage"],
                                "cost": item['cost'], "preview": item["preview_uri"]}])
        df_list.append(item_df)
    try:
        cursor = search_resp['pagination']['cursor']['next']
    except:
        cursor = None
    
    while cursor:
        search_url3 = f"https://api.skywatch.co/earthcache/archive/search/{search_id}/search_results?cursor={cursor}"
        search_resp_2 = requests.get(url=search_url3, headers=headers).json()
        for item in search_resp_2["data"]:
            datestr = item['start_time'].split("T")[0]
            date_obj = datetime.datetime.strptime(datestr, "%Y-%m-%d")
            year = date_obj.year
            month = date_obj.month
            isodate = date_obj.isocalendar()
            if isodate[0] == (year - 1) and isodate[1] == 52:
                week = 1
            else:
                week = isodate[1]

            sat_type = get_sat_type(item['product_name'], item['source'])
            item_df = pd.DataFrame([{"search_id": search_id, "id": item['id'], "product_name": item['product_name'], "datestr": datestr, 
                                    "date": date_obj, "year": year, "month": month, "week": week, "source": item['source'], "sat_type": sat_type, "area_sq_km": 
                                    item['area_sq_km'], "cloud_cover": item['result_cloud_cover_percentage'], "aoi_coverage": item["location_coverage_percentage"],
                                    "cost": item['cost'], "preview": item["preview_uri"]}])
            df_list.append(item_df)
        
        try:
            cursor = search_resp_2['pagination']['cursor']['next']
        except:
            cursor = None
    
    return df_list

In [None]:
df_list = []
for entry in date_list:
    start_date = entry['start_date']
    end_date = entry['end_date']
    print(f"running search query for date range: {start_date} to {end_date}")
    query = create_search_query(start_date, end_date, aoi_json)
    search_id = search_catalog(query, headers)
    search_resp = get_search_results(search_id, headers)

    df_list = parse_results(search_resp, headers, search_id, df_list)

In [None]:
search_df = pd.concat(df_list)
search_df.reset_index(drop=True, inplace=True)
search_df['year'] = search_df['year'].astype('category')
search_df['month'] = search_df['month'].astype('category')
search_df['week'] = search_df['week'].astype('category')
search_df["preview_html"] = search_df.apply(lambda x: f'<a href=\"{x["preview"]}\">Preview</a>', axis=1)

In [None]:
search_pickle = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}.pkl')
search_df.to_pickle(search_pickle)

## Filter query Dataframe. Continue from here if using previous query

In [None]:
search_pickle = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}.pkl')

with open(search_pickle, "rb") as f:
    search_df = pickle.load(f)

In [None]:
def filter_dates(df):
    top_coverage = df.nlargest(1,'aoi_coverage')["aoi_coverage"].tolist()[0]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        df['cover_percentmax'] = df.apply(lambda row: (row["aoi_coverage"] / top_coverage), axis=1)
        
    df = df.loc[(df["cover_percentmax"]>=0.9)]

    if 'superdove' in df['sat_type'].unique() or 'dove-r' in df['sat_type']:
        df = df.loc[(df['sat_type'] == 'superdove') | (df['sat_type'] == 'dove-r')]

    df = df.nlargest(1,'aoi_coverage')
    select_id = df['id'].values[0]

    return select_id

def filter_dataframe(df, cloud_cover_thresh, coverage_thresh):
    filtered_df = df.copy(deep=True)
    filtered_df = filtered_df.loc[(search_df["cloud_cover"]<=cloud_cover_thresh) & (filtered_df["aoi_coverage"] >= coverage_thresh)]

    id_list = []
    unique_dates = filtered_df.datestr.unique().tolist()
    for date_ in unique_dates:
        filtered_df2 = filtered_df.loc[filtered_df["datestr"]==date_]
        if len(filtered_df2) > 1:
            select_id = filter_dates(filtered_df2)
        else:
            select_id = filtered_df2['id'].values[0]
        
        id_list.append(select_id)

    filtered_df = filtered_df[filtered_df['id'].isin(id_list)]

    filtered_df.reset_index(inplace=True, drop=True)
    return filtered_df

cc_thresh = 2.0
coverage_thresh = 80.0
filtered_search = filter_dataframe(search_df, cc_thresh, coverage_thresh)

In [None]:
filter_search_pickle = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}_filtered.pkl')
filtered_search.to_pickle(filter_search_pickle)

## Creating graphs to explore query results

In [None]:
filter_search_pickle = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}_filtered.pkl')
with open(filter_search_pickle, "rb") as f:
    filtered_search = pickle.load(f)

In [None]:
def create_month_cc_graph(df, target_year, output_graph):
    target_df = df.loc[df["year"] == target_year]
    min_date = target_df.iloc[0]['datestr']
    max_date = target_df.iloc[-1]['datestr']

    month_df = pd.DataFrame()
    month_df["0% clouds"] = target_df.loc[target_df['cloud_cover'] == 0]['month'].value_counts()
    month_df["<=2% clouds"] = target_df.loc[(target_df['cloud_cover'] > 0) & (target_df['cloud_cover'] <= 2)]['month'].value_counts()
    month_df["<=5% clouds"] = target_df.loc[(target_df['cloud_cover'] > 2) & (target_df['cloud_cover'] <= 5)]['month'].value_counts()
    month_df["<=10% clouds"] = target_df.loc[(target_df['cloud_cover'] > 5) & (target_df['cloud_cover'] <= 10)]['month'].value_counts()
    month_df[">10% clouds"] = target_df.loc[target_df['cloud_cover'] > 10]['month'].value_counts()

    month_df.reset_index(inplace=True)
    month_df.rename(columns={'index': 'month'}, inplace=True)
    month_df.sort_values('month', inplace=True)
    month_df.reset_index(inplace=True, drop=True)

    month_fig = px.bar(month_df, x='month', y=['0% clouds', '<=2% clouds', '<=5% clouds', '<=10% clouds', '>10% clouds'], color_discrete_map={'0% clouds': 'green','<=2% clouds': 'yellow', "<=5% clouds": "orange", "<=10% clouds": "red", ">10% clouds": "maroon"})
    month_fig.update_layout(title_text=f"PlanetScope Cloud Cover By Month - {target_year} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Month of Year")

    filenamestr = f"PlanetScope_Monthly_Image_Count_{min_date}_to_{max_date}_{aoi_name}"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        month_fig.write_image(out_png)
        month_fig.write_html(out_html)

    month_fig.show()

create_month_cc_graph(filtered_search, 2021, False)

In [None]:
def create_week_cc_graph(df, target_year, output_graph):
    target_df = df.loc[df["year"] == target_year]
    min_date = target_df.iloc[0]['datestr']
    max_date = target_df.iloc[-1]['datestr']
    week_df = pd.DataFrame()
    week_df["0% clouds"] = target_df.loc[target_df['cloud_cover'] == 0]['week'].value_counts()
    week_df["<=2% clouds"] = target_df.loc[(target_df['cloud_cover'] > 0) & (target_df['cloud_cover'] <= 2)]['week'].value_counts()
    week_df["<=5% clouds"] = target_df.loc[(target_df['cloud_cover'] > 2) & (target_df['cloud_cover'] <= 5)]['week'].value_counts()
    week_df["<=10% clouds"] = target_df.loc[(target_df['cloud_cover'] > 5) & (target_df['cloud_cover'] <= 10)]['week'].value_counts()
    week_df[">10% clouds"] = target_df.loc[target_df['cloud_cover'] > 10]['week'].value_counts()

    week_df.reset_index(inplace=True)
    week_df.rename(columns={'index': 'week'}, inplace=True)
    week_df.sort_values('week', inplace=True)
    week_df.reset_index(inplace=True, drop=True)

    week_fig = px.bar(week_df, x='week', y=['0% clouds', '<=2% clouds', '<=5% clouds', '<=10% clouds', '>10% clouds'], color_discrete_map={'0% clouds': 'green','<=2% clouds': 'yellow', "<=5% clouds": "orange", "<=10% clouds": "red", ">10% clouds": "maroon"})
    week_fig.update_layout(title_text=f"PlanetScope Cloud Cover By Week - {min_date} to {max_date} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Week of Year")

    filenamestr = f"PlanetScope_Weekly_Image_Count_{min_date}_to_{max_date}_{aoi_name}"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        week_fig.write_image(out_png)
        week_fig.write_html(out_html)

    week_fig.show()

create_week_cc_graph(filtered_search, 2020, False)

In [None]:
def create_image_preview_graph(df, output_graph):
    layout = go.Layout(title=f'PlanetScope Images - {o_start_date} to {o_end_date} - {aoi_name}', xaxis=dict(title="Image Date"), yaxis=dict(title="Cloud Cover Percentage"))#, autosize=False, width=1200, height=510)

    fig = go.Figure(layout=layout)

    h_template='Image Date: %{customdata[0]}<br>%{customdata[3]}<br>Cloud Cover: %{customdata[2]}%<br>Image Source: %{customdata[1]}'

    fig.add_trace(go.Scatter(
        y=df['cloud_cover'],
        x=df['date'],
        mode='markers',
        orientation='h',
        marker=dict(
            color='red'),
        customdata=np.stack((df['datestr'], df['source'], df['cloud_cover'], df['preview_html']), axis=-1),
        hovertemplate=h_template
    ))

    fig.update_layout(title={'x': 0.5, 'xanchor': 'center'})

    if output_graph:
        out_html = out_path.joinpath(f"PlanetScope_Image_Previews_{o_start_date}_to_{o_end_date}_{aoi_name}.html")
        fig.write_html(out_html)

    fig.show()

create_image_preview_graph(filtered_search, False)

In [None]:
def create_year_cc_graph(df, output_graph):
    target_df = df
    min_year = target_df.iloc[0]['year']
    max_year = target_df.iloc[-1]['year']
    avg_price = target_df["cost"].mean()
    year_df = pd.DataFrame()
    year_df["0% clouds"] = target_df.loc[target_df['cloud_cover'] == 0]['year'].value_counts()
    year_df["<=2% clouds"] = target_df.loc[(target_df['cloud_cover'] > 0) & (target_df['cloud_cover'] <= 2)]['year'].value_counts()
    year_df["<=5% clouds"] = target_df.loc[(target_df['cloud_cover'] > 2) & (target_df['cloud_cover'] <= 5)]['year'].value_counts()
    year_df["0% clouds cost"] = year_df.apply(lambda row: round((row["0% clouds"] * avg_price), 2), axis=1)
    year_df["<=2% clouds cost"] = year_df.apply(lambda row: round((row["<=2% clouds"] * avg_price), 2), axis=1)
    year_df["<=5% clouds cost"] = year_df.apply(lambda row: round((row["<=5% clouds"] * avg_price), 2), axis=1)

    year_df.reset_index(inplace=True)
    year_df.rename(columns={'index': 'year'}, inplace=True)
    year_df.sort_values('year', inplace=True)
    year_df.reset_index(inplace=True, drop=True)

    h_template='Image Count 0% Clouds: %{customdata[0]} Estimated Cost: %{customdata[3]}<br>Image Count <=2% Clouds: %{customdata[1]} Estimated Cost: %{customdata[4]}<br>Image Count <=5% Clouds: %{customdata[2]} Estimated Cost: %{customdata[5]}'
    year_fig = px.bar(year_df, x='year', y=['0% clouds', '<=2% clouds', '<=5% clouds'], color_discrete_map={'0% clouds': 'green','<=2% clouds': 'yellow', "<=5% clouds": "orange"}, custom_data=np.stack((year_df['0% clouds'], year_df['<=2% clouds'], year_df['<=5% clouds'], year_df['0% clouds cost'], year_df['<=2% clouds cost'], year_df['<=5% clouds cost'])))
    year_fig.update_layout(title_text=f"PlanetScope Annual Image Counts and Cost - {min_year} to {max_year} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Year")
    year_fig.update_traces(hovertemplate=h_template)

    filenamestr = f"PlanetScope_Annual_Image_Count_and_Cost_{min_year}_to_{max_year}_{aoi_name}"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        year_fig.write_image(out_png)
        year_fig.write_html(out_html)

    year_fig.show()

    return year_df

year_df = create_year_cc_graph(filtered_search, False)

In [None]:
def create_month_multiyear_graph(df, output_graph):
    
    min_year = df.iloc[0]['year']
    max_year = df.iloc[-1]['year']

    month_df = pd.DataFrame()
    col_list = []
    for year in df.year.unique().tolist():
        target_df = df.loc[df["year"] == year]
        col_name = f"{year}_counts"
        month_df[col_name] = target_df.loc[target_df['cloud_cover'] == 0]['month'].value_counts()
        col_list.append(col_name)

    month_df.reset_index(inplace=True)
    month_df.rename(columns={'index': 'month'}, inplace=True)
    month_df.sort_values('month', inplace=True)
    month_df.reset_index(inplace=True, drop=True)

    # month_fig = px.bar(month_df, x='month', y=['0% clouds', '<=2% clouds', '<=5% clouds', '<=10% clouds', '>10% clouds'], color_discrete_map={'0% clouds': 'green','<=2% clouds': 'yellow', "<=5% clouds": "orange", "<=10% clouds": "red", ">10% clouds": "maroon"})
    month_fig = px.bar(month_df, x='month', y=col_list, barmode='group')
    month_fig.update_layout(title_text=f"PlanetScope Image Count By Year and Month - 0% Cloud Cover - {min_year} to {max_year} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Month of Year")

    filenamestr = f"PlanetScope_Image_Count_Annual_Monthly_0%CC_{min_year}_to_{max_year}_{aoi_name}"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        month_fig.write_image(out_png)
        month_fig.write_html(out_html)

    month_fig.show()

create_month_multiyear_graph(filtered_search, False)

In [None]:
def create_dove_monthly_graph(df, target_year, cc_thresh, output_graph):
    target_df = df.loc[df["year"] == target_year]
    min_date = target_df.iloc[0]['datestr']
    max_date = target_df.iloc[-1]['datestr']

    month_df = pd.DataFrame()
    col_list = []
    for datasource in df.sat_type.unique().tolist():
        if datasource == "dove-c":
            col_name = "Dove-Classic"
        elif datasource == "dove-r":
            col_name = "Dove-R"
        elif datasource == "superdove":
            col_name = "SuperDove"
        
        month_df[col_name] = target_df.loc[(target_df['cloud_cover'] == cc_thresh) & (target_df['sat_type'] == datasource)]['month'].value_counts()
        col_list.append(col_name)

    month_df.reset_index(inplace=True)
    month_df.rename(columns={'index': 'month'}, inplace=True)
    month_df.sort_values('month', inplace=True)
    month_df.reset_index(inplace=True, drop=True)

    # month_fig = px.bar(month_df, x='month', y=['0% clouds', '<=2% clouds', '<=5% clouds', '<=10% clouds', '>10% clouds'], color_discrete_map={'0% clouds': 'green','<=2% clouds': 'yellow', "<=5% clouds": "orange", "<=10% clouds": "red", ">10% clouds": "maroon"})
    month_fig = px.bar(month_df, x='month', y=col_list)
    month_fig.update_layout(title_text=f"PlanetScope Image Count By Month and By Satellite Type - {cc_thresh}% Cloud Cover - {target_year} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Month of Year")

    filenamestr = f"PlanetScope_Image_Count_Monthly_{cc_thresh}%CC_SatelliteType_{target_year}_{aoi_name}"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        month_fig.write_image(out_png)
        month_fig.write_html(out_html)

    month_fig.show()

create_dove_monthly_graph(filtered_search, 2021, 0, False)

In [None]:
def create_dove_year_graph(df, cc_thresh, output_graph):
    target_df = df
    min_year = df.iloc[0]['year']
    max_year = df.iloc[-1]['year']
    # avg_price = target_df["cost"].mean()
    year_df = pd.DataFrame()
    year_df["Dove-Classic"] = target_df.loc[(target_df['cloud_cover'] <= 5) & (target_df["sat_type"] == "dove-c")]['year'].value_counts()
    year_df["Dove-R"] = target_df.loc[(target_df['cloud_cover'] <= 5) & (target_df["sat_type"] == "dove-r")]['year'].value_counts()
    year_df["SuperDove"] = target_df.loc[(target_df['cloud_cover'] <= 5) & (target_df["sat_type"] == "superdove")]['year'].value_counts()
    
    year_df.reset_index(inplace=True)
    year_df.rename(columns={'index': 'year'}, inplace=True)
    year_df.sort_values('year', inplace=True)
    year_df.reset_index(inplace=True, drop=True)

    year_fig = px.bar(year_df, x='year', y=["Dove-Classic", "Dove-R", "SuperDove"])
    year_fig.update_layout(title_text=f"PlanetScope Image Count By Year and By Satellite Type - {cc_thresh}% Cloud Cover - {min_year} to {max_year} - {aoi_name}", title_x=0.5, yaxis_title="Image Count", xaxis_title="Year")

    filenamestr = f"PlanetScope_Annual_Image_Count_{cc_thresh}_%CC_SatelliteType_{min_year}_to_{max_year}_{aoi_name}_filtered"
    if output_graph:
        out_png = out_path.joinpath(f"{filenamestr}.png")
        out_html = out_path.joinpath(f"{filenamestr}.html")
        year_fig.write_image(out_png)
        year_fig.write_html(out_html)

    year_fig.show()

    return year_df

year_df = create_dove_year_graph(filtered_search, 0, False)
# year_df = create_dove_year_graph(search_df, 5)

## Download Imagery

In [None]:
filter_search_pickle = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}_filtered.pkl')
with open(filter_search_pickle, "rb") as f:
    filtered_search = pickle.load(f)

In [None]:
#Final image download selection criteria
max_cc = 0.0
# start_date_download = o_start_date
# end_date_download = o_end_date
start_date_download = "2023-01-01"
end_date_download = "2023-03-27"
start_date_final = datetime.datetime.strptime(start_date_download, "%Y-%m-%d")
end_date_final = datetime.datetime.strptime(end_date_download, "%Y-%m-%d")
out_path = out_path.joinpath(out_path, f"outputs_{aoi_name}_{start_date_download}_to_{end_date_download}")
out_path.mkdir(exist_ok=True)
sat_list = ['superdove']
filtered_download_pickle = out_path.joinpath(f'search_df_{start_date_download}_to_{end_date_download}_download.pkl')
filtered_download_resp_pickle = out_path.joinpath(f'search_df_{start_date_download}_to_{end_date_download}_resp.pkl')

pl_resp_dict = {}

In [None]:

#Filter results down to final download list based on above criteria
filtered_search_final = filtered_search.copy(deep=True)
filtered_search_final = filtered_search_final.loc[(filtered_search_final['date']>=start_date_final) & (filtered_search_final['date']<=end_date_final) & (filtered_search_final['cloud_cover']<=max_cc)& (filtered_search_final['sat_type'].isin(sat_list))]

filtered_search_final['pl_id'] = ""
filtered_search_final['pl_status'] = ""

filtered_search_final.to_pickle(filtered_download_pickle)

total_cost = filtered_search_final['cost'].sum()
num_images = len(filtered_search_final)
print(f"Total number of images to download: {num_images}. Total cost: ${total_cost} USD")

In [None]:
#Query endpoint to get different output types and build dictionary of selectable output types
output_id_url = f"https://api.skywatch.co/earthcache/outputs"
output_id_resp = requests.get(url=output_id_url, headers=headers).json()
output_type_dict = {}
for output in output_id_resp['data']:
    output_name = output['name']
    output_id = output['id']
    output_type_dict[output_name] = output_id

output_type_dict.keys()

In [None]:
output_id = output_type_dict['All Optical Bands']

In [None]:
def create_pipeline_query(pipeline_name, search_id, image_id, output_id):
    query = {
            "name": pipeline_name,
            "search_id": search_id,
            "search_results": image_id,
            "output": {
                "id": output_id,
                "format": "geotiff",
                "mosaic": "off"
            },
            }
    
    return query

def post_pipeline(query, headers):
    pipeline_url = "https://api.skywatch.co/earthcache/pipelines"
    pipeline_resp = requests.post(url=pipeline_url, data=json.dumps(query), headers=headers).json()
    pipeline_id = pipeline_resp['data']['id']
    return pipeline_id

def get_pipeline(pipeline_id):
    pipeline_get_url = f"https://api.skywatch.co/earthcache/interval_results?pipeline_id={pipeline_id}"
    pipeline_get_resp = requests.get(url=pipeline_get_url, headers=headers)#.json()

    return pipeline_get_resp

In [None]:
for index, row in filtered_search_final.iterrows():
    time.sleep(1)
    search_id = row["search_id"]
    image_id = row["id"]
    product_name = row["product_name"]
    print(f"creating download pipeline for image: {product_name}")
    pipeline_name = f"{aoi_name}_{product_name}"
    pl_query = create_pipeline_query(pipeline_name, search_id, image_id, output_id)
    pl_id = post_pipeline(pl_query, headers)
    filtered_search_final.loc[index, 'pl_id'] = pl_id


In [None]:
filtered_search_final.to_pickle(filtered_download_pickle)

filter_search_csv = out_path.joinpath(f'search_df_{o_start_date}_to_{o_end_date}_filtered.csv')

In [None]:
if filtered_download_pickle.exists():
    with open(filtered_download_pickle, 'rb') as f:
        filtered_search_final = pickle.load(f)
if filtered_download_resp_pickle.exists():
    with open(filtered_download_resp_pickle, 'rb') as f:
        pl_resp_dict = pickle.load(f)

In [None]:
def query_pipeline(pl_id):
    pl_resp = get_pipeline(pl_id)
    pl_resp_json = pl_resp.json()
    pl_status = pl_resp_json['data'][0]['status']
    return pl_status, pl_resp_json

num_iterations = 1
results_processing = True
while results_processing:
    status_list = []
    print('Checking status of image download pipelines.')
    for index, row in filtered_search_final.iterrows():
        time.sleep(1)
        pl_status = row['pl_status']
        pl_id = row['pl_id']
        if pl_status != 'complete':
            pl_status, pl_resp_json = query_pipeline(pl_id)
            filtered_search_final.loc[index, 'pl_status'] = pl_resp_json['data'][0]['status']
            pl_resp_dict[pl_id] = pl_resp_json
        
        status_list.append(pl_status)

    # if 'retrieving' not in set(status_list) and 'processing' not in set(status_list):
    filtered_search_final.to_pickle(filtered_download_pickle)
    with open(filtered_download_resp_pickle, 'wb') as f:
        pickle.dump(pl_resp_dict, f)
    if 'complete' in set(status_list) and len(set(status_list)) == 1:
        results_processing = False
        print('All image pipelines are finished and ready for download!')
    else:
        wait_time = 1800 / num_iterations
        if wait_time < 60:
            wait_time = 60
        wait_time_mins = round((wait_time / 60), 0)
        print(f"Results still pending for some items. Waiting for {wait_time_mins} mins and trying again.")
        num_iterations += 2
        time.sleep(wait_time)

In [None]:
def download_file(url, out_name):
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(out_name, 'wb') as f:
            for chunk in r.iter_content(chunk_size=None):
                f.write(chunk)

for index, row in filtered_search_final.iterrows():
    product_name = row['product_name']
    pl_id = row['pl_id']
    pl_resp_json = pl_resp_dict[pl_id]
    results = pl_resp_json['data'][0]['results'][0]
    download_dict = {'image': {'url': results['analytics_url'], 'extension': 'analytic.tif'},
                    'metadata': {'url': results['metadata_url'], 'extension': 'metadata.json'},
                    'cloud_mask': {'url': results['raster_files'][0]['uri'], 'extension': 'mask.tif'}}

    out_basepath = out_path.joinpath(product_name)
    out_basepath.mkdir(exist_ok=True)
    print(f'downloading all files for image: {product_name}')
    for key, value in download_dict.items():
        dl_url = value['url']
        extension = value['extension']
        out_fname = out_basepath.joinpath(f'{product_name}_{extension}')
        download_file(dl_url, out_fname)