# MN Geospatial Commons API

This script searches the MN Geospatial Commons using tags and returns search results. It then extracts one specified dataset from the search results or all of the datasets, and unzips the extracted datasets. 

In [None]:
import requests
from zipfile import ZipFile

In [None]:
packages = requests.get("https://gisdata.mn.gov/api/3/action/package_list", verify=False)
tag_list = requests.get("https://gisdata.mn.gov/api/3/action/tag_list", verify=False)
group_list = requests.get("https://gisdata.mn.gov/api/3/action/group_list", verify=False)

In [None]:
# All datasets available
packages_dict = packages.json()
assert packages_dict['success'] is True 

# All tags availabile
tag_dict = tag_list.json()
assert tag_dict['success'] is True 

# All categories available
group_dict = group_list.json()
assert group_dict['success'] is True 

In [None]:
datasets = packages_dict['result']
tags = tag_dict['result']
groups = group_dict['result']

In [46]:
# adjust index to see tags more or less tags
print(f"tags: {tags[:10]}")
print(f"\ngroups: {groups}")

tags: ['100k index', '103e', '15 minute', '1994', '1-meter orthophoto', '1 water supply planning working groups', '2000 census', '2002', '2010', '2010 census']

groups: ['biota', 'boundaries', 'climatology', 'economy', 'elevation', 'environment', 'farming', 'geoscientific', 'health', 'imagery-basemaps', 'inland-waters', 'intelligence-military', 'location', 'planning-cadastre', 'society', 'structure', 'transportation', 'utilities-communication']


In [None]:
# run this cell to view all datasets possible
datasets

### Querying and extracting data

In [47]:
# use '+' to seperate multiple tags in query i.e 2020+farming
query = "2030"

search_response = requests.get(f"https://gisdata.mn.gov/api/3/action/package_search?q={query}", verify=False)



In [48]:
search = search_response.json()
assert search['success'] is True

In [49]:
print(f"Search result count: {search['result']['count']}")

Search result count: 9


In [None]:
# Get url for first result only
data = search['result']['results'][0]['resources'][1]['url']
data_id = search['result']['results'][0]['resources'][1]['id']
out_name = f"{data_id}.zip"


# Write file to specified output type. Out path is same as this jupyter notebook.
r = requests.get(data)
assert r.status_code is 200
 
with open(out_name, "wb") as file:
    file.write(r.content)
    
with ZipFile(out_name, "r") as zipped:
    print(zipped.namelist())
    zipped.extractall()

In [None]:
# Get the urls for ALL the search results. Can specify format type. 
# Could you use this to extract all data from each url?
search_urls = []
ids = []
for i in search['result']['results']:
    for x in i['resources']:
        if x['format'] == 'SHP': # return only specified shp file 
            search_urls.append(x['url'])
            ids.append(x['id'])

In [None]:
# Extract data from all the URLS and named with their ID
for i in range(len(search_urls)):
    r = requests.get(search_urls[i])
    assert r.status_code is 200
    
    with open(f"{ids[i]}.zip", "wb") as file:
        file.write(r.content)
    
    with ZipFile(f"{ids[i]}.zip", "r") as zipped:
        print(zipped.namelist())
        zipped.extractall()   