Pull one page of 100 results from seeclickfix using the global PARAMS value if the parameters are not supplied. If there are more than 100 results, make another pull passing parameters that include the next page to be pulled.

In [1]:
import requests
import json

In [2]:
def get_seeclickfix(page=1, pulled=0, search_params={
    'place_url': 'district-of-columbia',
    'after': '2020-10-01T00:00:00Z',
    'per_page': 100
}):
    # base_url for seeclickfix api to build the request url
    base_url = 'https://seeclickfix.com/api/v2/issues'
    
    # send a get request with the url, parameters, and header
    r = requests.get(url=base_url, params=search_params)
    
    # for successful API call, response code will be 200 (OK)
    if (r.ok):
        
        # load the response data into a dict variable
        data = json.loads(r.content.decode('utf-8'))
        print(data['metadata'])
        
        # get the total search result count and set it to count_all. The API only allows 100 results per page
        count_all = data['metadata']['pagination']['entries']
        print(count_all)
        
        # track the number of items we have pulled with our requests
        pulled += 100
        
        # create a file name that reflects which page of results it contains and write that file
        file_name = f"seeclickfix{page}.json"
        with open(file_name, 'w') as outfile:
            json.dump(data, outfile)
            
        # check to see if we pulled all the results. If not, increment the page count, update the parameters dictionary to include the page number, and run the process again.
        if pulled < count_all:
            page += 1
            page_param = {'page': page}
            search_params.update(page_param)
            # print the params to monitor progress
            print(search_params)
            get_seeclickfix(page, pulled, search_params)
        
    else:
        # if response code is not ok (200), print the resulting http error
        r.raise_for_status()

In [3]:
if __name__ == '__main__':
    get_seeclickfix()

{'pagination': {'entries': 491, 'page': 1, 'per_page': 100, 'pages': 5, 'next_page': 2, 'next_page_url': 'https://seeclickfix.com/api/v2/issues?after=2020-10-01T00%3A00%3A00Z&page=2&per_page=100&place_url=district-of-columbia', 'previous_page': None, 'previous_page_url': None}}
491
{'place_url': 'district-of-columbia', 'after': '2020-10-01T00:00:00Z', 'per_page': 100, 'page': 2}
{'pagination': {'entries': 491, 'page': 2, 'per_page': 100, 'pages': 5, 'next_page': 3, 'next_page_url': 'https://seeclickfix.com/api/v2/issues?after=2020-10-01T00%3A00%3A00Z&page=3&per_page=100&place_url=district-of-columbia', 'previous_page': 1, 'previous_page_url': 'https://seeclickfix.com/api/v2/issues?after=2020-10-01T00%3A00%3A00Z&page=1&per_page=100&place_url=district-of-columbia'}}
491
{'place_url': 'district-of-columbia', 'after': '2020-10-01T00:00:00Z', 'per_page': 100, 'page': 3}
{'pagination': {'entries': 491, 'page': 3, 'per_page': 100, 'pages': 5, 'next_page': 4, 'next_page_url': 'https://seeclick