In [140]:
import requests
import pandas as pd

In [141]:
BASE_URL ='https://myhospitalsapi.aihw.gov.au//api/v1'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
measure_category_code = 'MYH-ED-TIME'

In [194]:
def create_table(measure_code: str):
    # CREATE VALUES TABLE
    reported_measure_response = requests.get(f'{BASE_URL}/measures/{measure_code}/data-items', headers=headers).json()
    reported_measure_list = []
    for result in reported_measure_response['result']:
        reported_measure_list.append([result['reported_measure_code'], result['reporting_unit_summary']['reporting_unit_code'], result['reporting_unit_summary']['reporting_unit_name'], result['value']])
    reported_measure_df = pd.DataFrame(data=reported_measure_list, columns=['reported_measure_code', 'reporting_unit_code', 'reporting_unit_name', 'value'])
    
    # CREATE REPORTED MEASURE NAMES TABLE
    reported_measure_code_list = list(reported_measure_df['reported_measure_code'].unique())
    reported_measure_list = []
    for reported_measure_code in reported_measure_code_list:
        reported_measure_code_name_response = requests.get(f'{BASE_URL}/reported-measures/{reported_measure_code}', headers=headers).json()
        reported_measure_list.append([reported_measure_code, reported_measure_code_name_response['result']['reported_measure_name']])
    reported_measure_name_df = pd.DataFrame(data=reported_measure_list, columns=['reported_measure_code', 'reported_measure_name'])
    reported_measure_name_df.head()
    
    # JOIN VALUES TABLE AND REPORTED MEASURE NAMES TABLE
    df_join = pd.merge(reported_measure_df, reported_measure_name_df, on='reported_measure_code',how='inner')
    df_join.head()
    
    df_select = df_join[['reporting_unit_name', 'reported_measure_name', 'value']]
    return df_select
create_table('MYH0036')    

Unnamed: 0,reporting_unit_name,reported_measure_name,value
0,The Children's Hospital at Westmead,Subsequently admitted patients,355.0
1,The Children's Hospital at Westmead,Subsequently admitted patients,338.0
2,The Children's Hospital at Westmead,Subsequently admitted patients,314.0
3,The Children's Hospital at Westmead,Subsequently admitted patients,256.0
4,The Children's Hospital at Westmead,Subsequently admitted patients,263.0
...,...,...,...
10195,Australian Capital Territory,All patients,194.0
10196,Australian Capital Territory,All patients,207.0
10197,Australian Capital Territory,All patients,212.0
10198,Australian Capital Territory,All patients,217.0


In [142]:
'''
CREATE MEASURE TABLE
EXAMPLE URL: https://myhospitalsapi.aihw.gov.au//api/v1/measure-categories/MYH-ED-TIME/measures
'''
measure_response = requests.get(f'{BASE_URL}/measure-categories/{measure_category_code}/measures', headers=headers).json()
measure_list = []
for i in measure_response['result']:
    measure_list.append([i['measure_code'], i['measure_name']])
measure_df = pd.DataFrame(data=measure_list, columns=['measure_code', 'measure_name'])
measure_df=measure_df.set_index('measure_code')
measure_df.head()

Unnamed: 0_level_0,measure_name
measure_code,Unnamed: 1_level_1
MYH0005,Percentage of patients who depart the emergenc...
MYH0012,Number of patients presenting to the emergency...
MYH0013,Time until most patients (90%) departed the em...
MYH0036,Median time (50%) patients departed emergency ...


In [189]:
'''
CREATE VALUES TABLE
EXAMPLE URL: https://myhospitalsapi.aihw.gov.au//api/v1/measures/MYH0036/data-items
'''
measure_code = 'MYH0036'
reported_measure_response = requests.get(f'{BASE_URL}/measures/{measure_code}/data-items', headers=headers).json()
reported_measure_list = []
for result in reported_measure_response['result']:
    reported_measure_list.append([result['reported_measure_code'], result['reporting_unit_summary']['reporting_unit_code'], result['reporting_unit_summary']['reporting_unit_name'], result['value']])
reported_measure_df = pd.DataFrame(data=reported_measure_list, columns=['reported_measure_code', 'reporting_unit_code', 'reporting_unit_name', 'value'])
reported_measure_df.head()

Unnamed: 0,reported_measure_code,reporting_unit_code,reporting_unit_name,value
0,MYH-RM0298,H0014,The Children's Hospital at Westmead,355.0
1,MYH-RM0299,H0014,The Children's Hospital at Westmead,170.0
2,MYH-RM0300,H0014,The Children's Hospital at Westmead,204.0
3,MYH-RM0298,H0014,The Children's Hospital at Westmead,338.0
4,MYH-RM0299,H0014,The Children's Hospital at Westmead,173.0


In [191]:
'''
CREATE REPORTED MEASURE NAMES TABLE
EXAMPLE URL: https://myhospitalsapi.aihw.gov.au//api/v1/reported-measures/MYH-RM0025
'''
reported_measure_code_list = list(reported_measure_df['reported_measure_code'].unique())
reported_measure_list = []
for reported_measure_code in reported_measure_code_list:
    reported_measure_code_name_response = requests.get(f'{BASE_URL}/reported-measures/{reported_measure_code}', headers=headers).json()
    reported_measure_list.append([reported_measure_code, reported_measure_code_name_response['result']['reported_measure_name']])
reported_measure_name_df = pd.DataFrame(data=reported_measure_list, columns=['reported_measure_code', 'reported_measure_name'])
reported_measure_name_df.head()

Unnamed: 0,reported_measure_code,reported_measure_name
0,MYH-RM0298,Subsequently admitted patients
1,MYH-RM0299,Not subsequently admitted patients
2,MYH-RM0300,All patients


In [269]:
'''
CREATE REPORTING UNITS TABLE (SINGLE)
EXAMPLE URL: https://myhospitalsapi.aihw.gov.au/api/v1/reporting-units/H0014/
'''
reporting_unit_code = 'H0220'
reporting_unit_response = requests.get(f'{BASE_URL}/reporting-units/{reporting_unit_code}', headers=headers).json()
result = reporting_unit_response['result']
reporting_unit_list = []

mapped_reporting_units = result['mapped_reporting_units']
state = ""
for mapped_reporting_unit in mapped_reporting_units:
    if mapped_reporting_unit['map_type']['mapped_reporting_unit_code'] == "STATE_MAPPING":
        state = mapped_reporting_unit['mapped_reporting_unit']['reporting_unit_code']
        break 

reporting_unit_list.append([reporting_unit_code, result['closed'], result['private'], result['latitude'], result['longitude'],state])
reporting_unit_df = pd.DataFrame(data=reporting_unit_list, columns=['reporting_unit_code','closed', 'private', 'latitude', 'longitude', 'state'])
reporting_unit_df.head()

Unnamed: 0,reporting_unit_code,closed,private,latitude,longitude,state
0,H0220,False,False,-32.635114,147.568997,NSW


In [271]:
'''
CREATE REPORTING UNITS TABLE
EXAMPLE URL: https://myhospitalsapi.aihw.gov.au/api/v1/reporting-units
'''
reporting_unit_response = requests.get(f'{BASE_URL}/reporting-units', headers=headers).json()
reporting_unit_list.append([result['reporting_unit_code'], result['reporting_unit_name']])   

reporting_unit_list = []
for result in reporting_unit_response['result']:
    # Get state
    mapped_reporting_unit = result['mapped_reporting_units']
    state = None
    for mapped_reporting_unit in mapped_reporting_units:
        if mapped_reporting_unit['map_type']['mapped_reporting_unit_code'] == "STATE_MAPPING":
            state = mapped_reporting_unit['mapped_reporting_unit']['reporting_unit_code']
            break # Set state as the first reporting_unit_code in mapped_reporting_units
    # Get reporting unit info
    reporting_unit_list.append([result['reporting_unit_code'], result['closed'], result['private'], result['latitude'], result['longitude'],state])
    
reporting_unit_df = pd.DataFrame(data=reporting_unit_list, columns=['reporting_unit_code','closed', 'private', 'latitude', 'longitude', 'state'])
reporting_unit_df.head()

Unnamed: 0,reporting_unit_code,closed,private,latitude,longitude,state
0,H0012,False,False,-31.960937,115.788431,NSW
1,H0013,False,False,-33.969070,151.243206,NSW
2,H0014,False,False,-33.801554,150.991759,NSW
3,H0015,False,False,-33.917179,151.238334,NSW
4,H0016,False,False,-33.880525,151.219237,NSW
...,...,...,...,...,...,...
1338,PHN502,False,False,,,NSW
1339,PHN503,False,False,,,NSW
1340,PHN601,False,False,,,NSW
1341,PHN701,False,False,,,NSW


In [272]:
'''
JOIN VALUES TABLE AND REPORTED MEASURE NAMES TABLE
'''
df_join = pd.merge(reported_measure_df, reported_measure_name_df, on='reported_measure_code',how='inner')
df_join = pd.merge(df_join, reporting_unit_df, on='reporting_unit_code',how='inner')
df_join.head()

Unnamed: 0,reported_measure_code,reporting_unit_code,reporting_unit_name,value,reported_measure_name,closed,private,latitude,longitude,state
0,MYH-RM0298,H0014,The Children's Hospital at Westmead,355.0,Subsequently admitted patients,False,False,-33.801554,150.991759,NSW
1,MYH-RM0298,H0014,The Children's Hospital at Westmead,338.0,Subsequently admitted patients,False,False,-33.801554,150.991759,NSW
2,MYH-RM0298,H0014,The Children's Hospital at Westmead,314.0,Subsequently admitted patients,False,False,-33.801554,150.991759,NSW
3,MYH-RM0298,H0014,The Children's Hospital at Westmead,256.0,Subsequently admitted patients,False,False,-33.801554,150.991759,NSW
4,MYH-RM0298,H0014,The Children's Hospital at Westmead,263.0,Subsequently admitted patients,False,False,-33.801554,150.991759,NSW


In [274]:
'''
SELECT COLUMNS
'''
df_select = df_join[['reporting_unit_code','reporting_unit_name', 'state', 'reported_measure_name', 'value']]
df_select.iloc[3400:3410]

Unnamed: 0,reporting_unit_code,reporting_unit_name,state,reported_measure_name,value
3400,H0154,Corowa Health Service,NSW,All patients,70.0
3401,H0154,Corowa Health Service,NSW,All patients,72.0
3402,H0154,Corowa Health Service,NSW,All patients,78.0
3403,H0154,Corowa Health Service,NSW,All patients,72.0
3404,H0154,Corowa Health Service,NSW,All patients,69.0
3405,H0155,Deniliquin Hospital,NSW,Subsequently admitted patients,201.0
3406,H0155,Deniliquin Hospital,NSW,Subsequently admitted patients,212.0
3407,H0155,Deniliquin Hospital,NSW,Subsequently admitted patients,185.0
3408,H0155,Deniliquin Hospital,NSW,Subsequently admitted patients,187.0
3409,H0155,Deniliquin Hospital,NSW,Subsequently admitted patients,213.0


In [185]:
'''TO DO
FILTER BY
reporting_unit_code
reported_measure_code

GET YEAR

Check other measures e.g. number of presentations

Load pandas df to spark df and partition + cluster


'''