In [16]:
import os.path
import json
import requests
from requests.auth import HTTPBasicAuth
from urllib.parse import quote_plus
import time

KEY_311 = 'E6100B3368DD7876FC970F9C1E1627BC'
AUTH_311 = HTTPBasicAuth('key', KEY_311)
API_URL_311 = 'https://boston2-production.spotmobile.net/open311/v2/services.json'
ILLEGAL_PARKING_SERVICE_CODE = quote_plus("Transportation - Traffic Division:Enforcement & Abandoned Vehicles:Parking Enforcement")

This file contains methods which will retrieve all of the Illegal Parking Service Requests submitted to Boston 311 in the last 90 days.
Executing the notebook will append new Service Requests to the end of the existing file, then clean up any duplicates.

Change the value of the `FILENAME` variable if a fresh file containing the last 90 days of Illegal Parking Service Requests.
To add to an existing file containing Illegal Parking Service Requests, set the `FILENAME` variable to the file-path and ensure the file is a JSON file containing a single array. The following method will append new Service Requests to the end of the existing ones.

In [17]:
FILENAME = 'data/service_requests.json'

In [18]:
def execute_single_query(page_num:int):
    ''' 
    Sends an HTTP GET request to the Boston 311 API for Illegal Parking.
    Requests 100 (maximum allowed) service requests for a given page.
    
    Returns: HTTP Response Object
    '''
    query_url = f'https://boston2-production.spotmobile.net/open311/v2/requests.json?service_code={ILLEGAL_PARKING_SERVICE_CODE}&per_page=100&page={page_num}'

    return requests.get(url=query_url, auth=AUTH_311)

def retreive_all_service_requests(filename:str):
    ''' 
    Continually sends HTTP Requests to the Boston 311 API for Illegal Parking Service Requests.
    Concatenates responses into a single JSON Array containing Service Request JSON Objects.
    Updates are sent to the console throughout the process.

    If the given filename already exists, it will try to append the gathered Service Requests to the file.

    Returns: writes the JSON Array to the given filename or appends to the end if the file already exists, once all service requests are received.
    '''

    # Executes first query
    page = 1
    current_response = execute_single_query(page)

    # Ensure HTTP Error is captured
    if current_response.status_code != 200:
        print(f'First Query failed. Error code: {current_response.status_code}')
        return
    page+=1
    
    # Creates JSON Array Object on response
    all_service_requests_json = current_response.json()

    # Continually execute queries for the next page until there are no more (Error Code != 200, 429)
    while True:
        current_response = execute_single_query(page)
        status_code = current_response.status_code

        # Successful response has service request JSON Objects concatenated to end of overall JSON Array
        if status_code == 200:
            print(f'Query Received: {page}')
            for service_request_obj in current_response.json():
                all_service_requests_json.append(service_request_obj)
            page += 1
        
        # Error code 429 indicates rate limiting (10 GET Requests per min)
        elif status_code == 429:
            print(f'Rate Limited: Waiting 1 minute. \n Current Page {page}')
            # Wait 60 seconds to resume query
            time.sleep(60)
            print('Resuming Query...')
        
        # Any other error code, break loop.
        else: 
            print(f'Error recieved: {status_code}')
            break

    print(f'Finished querying. Total pages queried: {page}\n Printing...')

    # Write received service requests to specificed filename.
    # If the file exists and it is a JSON array, append to the end of the array
    if os.path.exists(filename):
            all_service_requests_file = open(filename, 'r+')
            existing_service_requests = json.loads(all_service_requests_file.read())

            # Make sure it is a JSON array
            if not type(existing_service_requests) is list:
                print('Given filename exists and is not a JSON Array. Please supply either a new filename or one which contains an array of service requests.')
            
            # Append the new service requests to the end of the array
            for request in all_service_requests_json:
                existing_service_requests.append(request)

            # Move back to the top of the file
            all_service_requests_file.seek(0)

            # Re-write the file
            all_service_requests_file.write(json.dumps(existing_service_requests))

            # Make sure to truncate if for some reason the new version is now smaller
            all_service_requests_file.truncate()
            all_service_requests_file.close()
        
        
    else:
        all_service_requests_file = open(filename, 'w')
        all_service_requests_file.write(json.dumps(all_service_requests_json))
        all_service_requests_file.close()

def clean_up_duplicate_requests(filename:str):
    if not os.path.exists(filename):
        print(f'Please supply a valid filename. {filename} does not exist.')

    requests_file = open(filename, 'r+')
    requests = json.loads(requests_file.read())

    if not type(requests) is list:
        print(f'Please supply a JSON file containing a single array. {filename} is of type {type(requests)}.')

    # Check each Service Request ID and make sure we haven't seen it yet.
    seen_sr_ids = set()

    no_dupes_requests = []

    for request in requests:
        req_id = request.get('service_request_id', False)

        if not req_id:
            # Found an invalid service request (must have an ID). Remove from dataset!
            continue

        if req_id in seen_sr_ids:
            # Found dupe. Remove from dataset!
            continue

        seen_sr_ids.add(req_id)
        no_dupes_requests.append(request)
    
    # After all dupes / invalid requests have been filtered, rewrite the file
    # Move back to the top of the file
    requests_file.seek(0)

    # Re-write the file
    requests_file.write(json.dumps(no_dupes_requests))

    # Make sure to truncate if for some reason the new version is now smaller
    requests_file.truncate()
    requests_file.close()

The following cell will collect all Illegal Parking Service Requests from the last 90 days and write them to `FILENAME`.

In [19]:
retreive_all_service_requests(FILENAME)

Query Received: 2
Query Received: 3
Query Received: 4
Query Received: 5
Query Received: 6
Query Received: 7
Query Received: 8
Query Received: 9
Query Received: 10
Query Received: 11
Rate Limited: Waiting 1 minute. 
 Current Page 12
Resuming Query...
Query Received: 12
Query Received: 13
Query Received: 14
Query Received: 15
Query Received: 16
Query Received: 17
Query Received: 18
Query Received: 19
Query Received: 20
Query Received: 21
Rate Limited: Waiting 1 minute. 
 Current Page 22
Resuming Query...
Query Received: 22
Query Received: 23
Query Received: 24
Query Received: 25
Query Received: 26
Query Received: 27
Query Received: 28
Query Received: 29
Query Received: 30
Query Received: 31
Query Received: 32
Query Received: 33
Query Received: 34
Query Received: 35
Query Received: 36
Query Received: 37
Query Received: 38
Query Received: 39
Query Received: 40
Rate Limited: Waiting 1 minute. 
 Current Page 41
Resuming Query...
Query Received: 41
Query Received: 42
Query Received: 43
Query 

The following cell will remove all duplicate service requests from `FILENAME`.

In [20]:
clean_up_duplicate_requests(FILENAME)