### Download available [River Segment Surface Area Dataset](https://mint.isi.edu/ethiopia/datasets/browse/da6b6d47-7672-4e6e-a455-7bbc7e7ceb99) data using MINT Data Catalog API

This notebook allows the user to set spatial filters to download surface area csv files.

In [64]:
# Prerequisites: python 3.6 or later
import requests
import json
import uuid
import pprint
import datetime
import os
pp = pprint.PrettyPrinter(indent=2)

# for converting metadata into a GIS ready geojson
import shapely
import pandas
import geopandas

#### set spatial filter

In [67]:
# Download dataset's resources based on spatial coverage

# directory where data will be downloaded (change the directory path as needed.)
out_dir = os.getcwd() + '/data_download/' #-------------------- set this parameter--------------------


# ----- WARNING -----------------------------------------------------------------------------
# Currently, there is a limit to the number of records that the API will return at once. 
# By default it's 20, but it's possible to return up to 2000 records by specifying the 
# appropriate limit. However, the dataset contains ~9000 resources, 
# which is why it's important to provide additional spatial filtering criteria
# -------------------------------------------------------------------------------------------
limit = 2000  #-------------------- set this parameter--------------------


# Specifying spatial coverage as a lat/lon bounding box:
lon_min = 41.5278  #-------------------- set this parameter--------------------
lat_min = 3.9277  #-------------------- set this parameter--------------------
lon_max = 41.6694 #-------------------- set this parameter--------------------
lat_max = 4.0162  #-------------------- set this parameter--------------------

bounding_box = {"type": "Polygon", "coordinates": [[[lon_min, lat_min], [lon_max, lat_min], [lon_max, lat_max], [lon_min, lat_max], [lon_min, lat_min]]]}

#### Download data
- A csv for each river segment within the spatial filter will be downloaded in the <out_dir> folder
- A txt file (metadata.txt) is created that contains name of the river segment files and their corresponding bounding box. The file metadata.txt has 7 columns: filename, lon_min,lat_min,lon_max,lat_max,start_time,end_time
- A GIS ready metadata.geojson file is created that contains the bounding boxes of the downloaded segment with filename as their attribute. 

In [68]:
# This is a convenience method to handle api responses.
def handle_api_response(response, print_response=True):
    parsed_response = response.json()

    if print_response:
        pp.pprint(parsed_response)
    
    if response.status_code == 200:
        return parsed_response
    elif response.status_code == 400:
        raise Exception("Bad request ^")
    elif response.status_code == 403:
        msg = "Please make sure your request headers include X-Api-Key and that you are using correct url"
        raise Exception(msg)
    else:
        msg = """It seems our server encountered an error which it doesn't know how to handle yet. 
        This sometimes happens with unexpected input(s). In order to help us diagnose and resolve the issue, 
        please notify Dan Feldman (danf@usc.edu) of this error."""
    
    return parsed_response


# Data Catalog api endpoint url 
url = "https://api.mint-data-catalog.org" # (fixed. Do not change)

# Get session token to use the API
resp = requests.get(f"{url}/get_session_token").json()
api_key = resp['X-Api-Key']

request_headers = {
    'Content-Type': "application/json",
    'X-Api-Key': api_key
}

# Set dataset_id (fixed. Do not change.)
dataset_id = 'da6b6d47-7672-4e6e-a455-7bbc7e7ceb99'

search_query = {
    "dataset_id": dataset_id,
    "filter":{
        "spatial_coverage__intersects": bounding_box
    },
    "limit": limit
}

response = handle_api_response(requests.post(url + '/datasets/dataset_resources',
                                                headers=request_headers,
                                                json=search_query),print_response=False)

num_resources = len(response['resources'])
print('Number of resources: ' + str(num_resources))


if os.path.isdir(out_dir)==False:
    os.mkdir(out_dir)
fid = open(out_dir + 'metadata.txt','w')
fid.write('filename,lon_min,lat_min,lon_max,lat_max,start_time,end_time\n')
for i in range(num_resources):
    cur_url = response['resources'][i]['resource_data_url']
    print(cur_url)
    cur_name = cur_url.split('/')[-1]
    
    # writing metadata to metadata.txt
    xmin = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['xmin']
    ymin = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['ymin']
    xmax = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['xmax']
    ymax = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['ymax']
    
    
    start_time = response['resources'][i]['resource_metadata']['temporal_coverage']['start_time']
    end_time = response['resources'][i]['resource_metadata']['temporal_coverage']['end_time']
    fid.write(cur_name + ',' + str(xmin) + ',' + str(ymin) + ',' + str(xmax) + ',' + str(ymax) + ',' + start_time + ',' + end_time + '\n')
    
    # downloading river segment csv
    os.system('wget -O ' + out_dir + cur_name + ' ' + cur_url)
    
    # adding metadata to metadata.shp
    lat_point_list = [ymax,ymax,ymin,ymin,ymax]
    lon_point_list = [xmin,xmax,xmax,xmin,xmin]
    polygon_geom = shapely.geometry.Polygon(zip(lon_point_list, lat_point_list))
    
    if i==0:
        pf = geopandas.GeoDataFrame(index=[i], crs="EPSG:4326", geometry=[polygon_geom])
        pf['filename'] = cur_name
    else:
        cf = geopandas.GeoDataFrame(index=[i], crs="EPSG:4326", geometry=[polygon_geom])
        cf['filename'] = cur_name
        pf = pandas.concat([pf,cf])

fid.close()
pf.to_file(filename=out_dir + 'metadata.geojson', driver="GeoJSON")

Number of resources: 28
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31736.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31644.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31690.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32084.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31394.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31562.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-31780.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32158.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32202.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-27816.csv
https://data.mint.isi.edu/files/Rive