## Pulling data from the SafetyPAD API

SafetyPAD is one of the systems that FEMS uses to track the outcoms of patients that
they service. This is the script we used to pull data from that API.

Please note that this script is preserved _as it was_ at the time of running as running
it again my change the analysis. Thus, if you wish to rerun it, you will need to
perform several steps:
  * You will need to procure a SafetyPAD API key and fill it in below
  * You will need to alter several of the directory names as the overall structure
    of this repository has been refactored in the intervening time.

In [None]:
import copy
import glob
import json
import pickle
import time
from itertools import chain
from typing import Union
from urllib import parse

import pandas as pd
import requests
from lxml import etree

In [None]:
API_KEY = << YOUR API KEY >>

### Searching PCRs

In [None]:
BASE_URL = r"https://dcfems.safetypad.com/api/"


def is_success(tree):
    elts = tree.xpath("//safetypadapiresponse/status")
    if not elts:
        return False
    return elts[0].text == "SUCCESS"


def to_dict(pcr):
    return {elt.tag: elt.text for elt in pcr.getchildren()}


COLUMNS = (
    [
        "eCase.01m",  # PCR ID
        "eResponse.01",  # Agency Number
        "eResponse.02",  # Agency Name
        "eResponse.03",  # Incident Number
        "eResponse.05",  # Type of servicee requested
        "eResponse.07",  # Primary role of unit
        "eResponse.14",  # EMS Unit Call Sign
        "eResponse.14s",  # Shift
        "eResponse.15",  # Level of care of unit
        "eDispatch.01",  # Complaint reported by dispatch
        "eTimes.01",  # PSAP Call Date/Time
        "eTimes.02",  # Dispatch Notified Date/Time
        "eTimes.03",  # Unit Notified by Dispatch Date/Time
        "eDisposition.01",  # Destination/Transferred To, Name
        "eDisposition.12",  # Incident/Patient Disposition
        "eScene.15",  # Incident Street Address
        "eScene.17",  # Incident City
        "eScene.18",  # Incident State
        "eScene.19",  # Incident ZIP Code
        "ePayment.01",  # Primary Method of Payment
        "ePayment.10",  # Insurance Company Name
        "ePayment.17",  # Insurance Group ID
        "ePayment.18",  # Insurance Policy ID Number
        "eNarrative.01",  # Narrative data about the incident
        "eCrew.01",  # Crew id
        "ePatient.18s",  # Patient phone number type
    ]
    + [
        "ePatient.{:02d}".format(i)
        for i in range(2, 22)  # Patient detail; 01 causes issues
    ]
    + ["ePatient.{:02d}s".format(i) for i in range(23, 25)]  # Signature information
    + ["ePatient.{:02d}c".format(i) for i in range(25, 28)]  # Hospital information
    + ["eOutcome.{:02d}".format(i) for i in range(1, 18)]  # Outcome information
)


class ApiShim:
    def __init__(self, api_key):
        self.api_key = api_key
        self.columns = ",".join(COLUMNS)

    def _action(self, action, attrs, parser=etree):
        attrs = copy.copy(attrs)
        attrs["action"] = action
        attrs["api_key"] = self.api_key

        # Pull data
        resp = requests.get(BASE_URL + "?" + parse.urlencode(attrs))

        # Parse data
        tree = parser.fromstring(resp.content)

        return tree

    def search_pcrs(self, max_num=10):
        tree = self._action("search_pcrs", {"max": max_num})
        if not is_success(tree):
            raise ValueError("Something went wrong retrieving tree")
        data = [
            to_dict(pcr)
            for pcr in tree.xpath("//safetypadapiresponse/message/pcrs/pcr")
        ]
        return pd.DataFrame(data)

    def view_pcr(self, pcr_id):
        tree = self._action(
            "search_pcrs",
            {"e1": 676, "o1": "equals", "v1": str(pcr_id), "columns": self.columns},
        )

        return {
            "raw": etree.tostring(tree).decode("utf8"),
            "parsed": [
                {
                    "tag": str(node.tag),
                    "attributes": dict(node.attrib),
                    "text": str(node.text),
                }
                for node in tree.xpath("//pcr/*")
            ],
        }

In [None]:
# Whip up a shim
shim = ApiShim(api_key=API_KEY)

# Pull some PCRs
# shim.search_pcrs()

# View a specific PCR
# shim.view_pcr(941389)

shim.view_pcr(949257)

In [None]:
# Load in pcrs to pull
pcrs = pickle.load(open("PCRS_forAPI/PCR_list.pkl", "rb"))

In [None]:
def _is_num(x: Union[str, int]) -> Union[int, bool]:
    """ Try to convert a string to an int; if it fails, return False """
    try:
        return int(x)
    except:
        return False

In [None]:
# Sort so this file can be run consistently
pcrs = sorted(set([_is_num(x) for x in pcrs.values if _is_num(x)]))

In [None]:
# Pull data. Note that we checkpoint at various times to make sure that we don't
# lose any data
current_data = []
start_at = 0
for i, pcr_id in enumerate(pcrs[start_at:], start_at + 1):
    if i % 40 == 0:
        with open("private_data/pcrs/group_{}.json".format(i), "wt") as f:
            json.dump(current_data, f)
        current_data = []
        print("On {}/{}".format(i, len(pcrs)))
    data = shim.view_pcr(pcr_id)
    current_data.append(data)
    time.sleep(1)

with open("private_data/pcrs/final.json", "wt") as f:
    json.dump(current_data, f)

In [None]:
# Check to see if we missed anything
all_data = []

for filename in glob.glob("private_data/pcrs/group*.json"):
    with open(filename) as f:
        all_data.append(json.load(f))

In [None]:
len(all_data)

In [None]:
data = list(chain(*all_data))

In [None]:
len(data)

In [None]:
def get_pcr(datum):
    return [int(x["text"]) for x in datum if x["tag"] == "eCase.01m"][0]


pulled_pcrs = {get_pcr(x["parsed"]) for x in data}

In [None]:
# Any extras we missed?
len(set(pcrs) - set(pulled_pcrs))

In [None]:
# Pull the missing ones
current_data = []
for i, pcr_id in enumerate(set(pcrs) - set(pulled_pcrs)):
    print("On", i)
    this_datum = shim.view_pcr(pcr_id)
    current_data.append(this_datum)
    time.sleep(1)

In [None]:
# Append
for datum in current_data:
    data.append(datum)

In [None]:
# Are we missing any more?
assert len(data) == len(pcrs)

pulled_pcrs = {get_pcr(x["parsed"]) for x in data}
assert not (set(pcrs) - set(pulled_pcrs))

In [None]:
# Dump the data
with open("private_data/pcrs/final.json", "wt") as f:
    json.dump(data, f)