### Download available [River Segment Surface Area Dataset](https://mint.isi.edu/ethiopia/datasets/browse/da6b6d47-7672-4e6e-a455-7bbc7e7ceb99) data using MINT Data Catalog API

This notebook allows the user to set spatial and temporal filters to download surface area csv files.

In [17]:
# Prerequisites: python 3.6 or later
import requests
import json
import uuid
import pprint
import datetime
import os
pp = pprint.PrettyPrinter(indent=2)

# for converting metadata into a GIS ready geojson
import shapely
import pandas
import geopandas

#### Set spatial and temporal filters

In [18]:
# Download dataset's resources based on temporal and spatial coverage

# directory where data will be downloaded (change the directory path as needed.)
out_dir = os.getcwd() + '/data_download/'


# ----- WARNING -----------------------------------------------------------------------------
# Currently, there is a limit to the number of records that the API will return at once. 
# By default it's 20, but it's possible to return up to 2000 records by specifying the 
# appropriate limit. However, the dataset contains ~9000 resources, 
# which is why it's important to provide additional filtering criteria like spatial 
# and temporal coverage
# -------------------------------------------------------------------------------------------
limit = 20


# Specifying spatial coverage as a lat/lon bounding box:
# Bounding box search parameter is a 4-element numeric array (in WGS84 coordinate system) [xmin, ymin, xmax, ymax]
# As a reminder, x is longitude, y is latitude

# For example, bounding box for Ethiopia+SNNPR+KAT (adm level 2) is
# {"xmax": 38.062137603759766, "xmin": 37.3511962890625, "ymax": 7.4791812896728525, "ymin": 7.147633552551269}
# We don't have to match those coordinates exactly as data catalog supports "within" and "intersects" geospatial queries

bounding_box = [41.468759, 3.775651, 42.024722, 4.126743]

# Specifying temporal coverage as start/end times in ISO8601 format. Supported operators are: 
# gt (greater than), 
# gte (greater than or equal)
# lt (less than),
# lte (less than or equal)
#
# For example, to specify temporal coverage for the entire 2018, we will write 
start_time = "2018-01-01T00:00:00"
end_time = "2018-12-31T23:59:59"

#### Download data
- A csv for each river segment with the spatial and temporal range will be downloaded in the out_dir folder
- A txt file (metadata.txt) is created that contains name of the river segment files and their corresponding bounding box. The file metadata.txt has 7 columns: filename, lon_min,lat_min,lon_max,lat_max,start_time,end_time
- A GIS ready metadata.geojson file is created that contains the bounding boxes of the downloaded segment with filename as their attribute. 

In [19]:
# This is a convenience method to handle api responses.
def handle_api_response(response, print_response=True):
    parsed_response = response.json()

    if print_response:
        pp.pprint(parsed_response)
    
    if response.status_code == 200:
        return parsed_response
    elif response.status_code == 400:
        raise Exception("Bad request ^")
    elif response.status_code == 403:
        msg = "Please make sure your request headers include X-Api-Key and that you are using correct url"
        raise Exception(msg)
    else:
        msg = """It seems our server encountered an error which it doesn't know how to handle yet. 
        This sometimes happens with unexpected input(s). In order to help us diagnose and resolve the issue, 
        please notify Dan Feldman (danf@usc.edu) of this error."""
    
    return parsed_response


# Data Catalog api endpoint url 
url = "https://api.mint-data-catalog.org" # (fixed. Do not change)

# Get session token to use the API
resp = requests.get(f"{url}/get_session_token").json()
api_key = resp['X-Api-Key']

request_headers = {
    'Content-Type': "application/json",
    'X-Api-Key': api_key
}

# Set dataset_id (fixed. Do not change.)
dataset_id = 'da6b6d47-7672-4e6e-a455-7bbc7e7ceb99'

search_query = {
    "dataset_id": dataset_id,
    "spatial_coverage__within": bounding_box,    
    "start_time__gte": start_time,
    "end_time__lte": end_time,
    "limit": limit
}

response = handle_api_response(requests.post(url + '/datasets/dataset_resources',
                                                headers=request_headers,
                                                json=search_query),print_response=False)

num_resources = len(response['resources'])
print('Number of resources: ' + str(num_resources))


if os.path.isdir(out_dir)==False:
    os.mkdir(out_dir)
fid = open(out_dir + 'metadata.txt','w')
fid.write('filename,lon_min,lat_min,lon_max,lat_max,start_time,end_time\n')
for i in range(num_resources):
    cur_url = response['resources'][i]['resource_data_url']
    print(cur_url)
    cur_name = cur_url.split('/')[-1]
    
    # writing metadata to metadata.txt
    xmin = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['xmin']
    ymin = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['ymin']
    xmax = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['xmax']
    ymax = response['resources'][i]['resource_metadata']['spatial_coverage']['value']['ymax']
    start_time = response['resources'][i]['resource_metadata']['temporal_coverage']['start_time']
    end_time = response['resources'][i]['resource_metadata']['temporal_coverage']['end_time']
    fid.write(cur_name + ',' + str(xmin) + ',' + str(ymin) + ',' + str(xmax) + ',' + str(ymax) + ',' + start_time + ',' + end_time + '\n')
    
    # downloading river segment csv
    os.system('wget -O ' + out_dir + cur_name + ' ' + cur_url)
    
    # adding metadata to metadata.shp
    lat_point_list = [ymax,ymax,ymin,ymin,ymax]
    lon_point_list = [xmin,xmax,xmax,xmin,xmin]
    polygon_geom = shapely.geometry.Polygon(zip(lon_point_list, lat_point_list))
    
    if i==0:
        pf = geopandas.GeoDataFrame(index=[i], crs="EPSG:4326", geometry=[polygon_geom])
        pf['filename'] = cur_name
    else:
        cf = geopandas.GeoDataFrame(index=[i], crs="EPSG:4326", geometry=[polygon_geom])
        cf['filename'] = cur_name
        pf = pandas.concat([pf,cf])

        
    
fid.close()
pf.to_file(filename=out_dir + 'metadata.geojson', driver="GeoJSON")

Number of resources: 20
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051115400-1923.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051115400-1879.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051115400-1843.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051115400-1779.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051115400-1731.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32202.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32158.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32122.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32084.csv
https://data.mint.isi.edu/files/River_Segment_Surface_Area_Dataset//data-1051038740-32044.csv
https://data.mint.isi.edu/files/River_Seg

In [20]:
print(bounding_box)

[41.468759, 3.775651, 42.024722, 4.126743]


In [25]:
for i in range(num_resources):
    print(response['resources'][i]['resource_metadata']['spatial_coverage']['value'])

{'xmax': 45.0234617921, 'xmin': 45.0094617921, 'ymax': 4.9566164243, 'ymin': 4.9426164243}
{'xmax': 45.0218382965, 'xmin': 45.0078382965, 'ymax': 4.9625872426, 'ymin': 4.9485872426}
{'xmax': 45.0177792673, 'xmin': 45.0037792673, 'ymax': 4.9674725041, 'ymin': 4.9534725041}
{'xmax': 45.0172381451, 'xmin': 45.0032381451, 'ymax': 4.9737146898, 'ymin': 4.9597146898}
{'xmax': 45.0126377797, 'xmin': 44.9986377797, 'ymax': 4.9777857296, 'ymin': 4.9637857296}
{'xmax': 41.5537686006, 'xmin': 41.5397686006, 'ymax': 3.9886435484, 'ymin': 3.9746435484}
{'xmax': 41.5597115266, 'xmin': 41.5457115266, 'ymax': 3.9897096446, 'ymin': 3.9757096446}
{'xmax': 41.56455945, 'xmin': 41.55055945, 'ymax': 3.98589864, 'ymin': 3.97189864}
{'xmax': 41.5696781355, 'xmin': 41.5556781355, 'ymax': 3.9823579272, 'ymin': 3.9683579272}
{'xmax': 41.5723611346, 'xmin': 41.5583611346, 'ymax': 3.9769268595, 'ymin': 3.9629268595}
{'xmax': 41.5785696797, 'xmin': 41.5645696797, 'ymax': 3.9766363611, 'ymin': 3.9626363611}
{'xmax'