# Extract ABS Data
The Australian Bureau of Statstics contains datasets around the economy of Australia

In [1]:
import pandas as pd
import requests
import io
import xml.etree.ElementTree as ET

In [6]:
class ABSClient:
    BASE_URL = 'https://api.data.abs.gov.au'
    FORMAT = 'format=jsondata'

    def __init__(self):
        return self

    def get_available_dataflows_to_df(structure_type:str='dataflow', agency_id:str='ABS', detail:str='full'):
        url = f'{BASE_URL}/{structure_type}/{agency_id}'
        query = f'{FORMAT}'
        headers = {'Accepts':'application/json'}
        response = requests.get(f'{url}?{query}', headers = headers)
        return pd.read_xml(io.StringIO(response.text), xpath='//structure:Dataflow', namespaces={"structure": "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure"})

In [15]:
df = ABSClient.get_available_dataflows_to_df()

In [16]:
df[df['id'] == 'CPI']

Unnamed: 0,id,agencyID,version,isFinal,Annotations,Name,Description,Structure
1090,CPI,ABS,1.1.0,True,\n,Consumer Price Index (CPI) 17th Series,The Consumer Price Index (CPI) measures quarte...,\n


In [19]:
def get_data(dataflow_id:str, data_key:str, start_period:str, end_period:str, detail:str='full', dimension_at_observation:str='AllDimensions'):
    url = f'{BASE_URL}/data/{dataflow_id}/{data_key}'
    query = f'{FORMAT}'
    headers = {'Accepts':'application/vnd.sdmx.data+json'}
    response = requests.get(f'{url}?{query}', headers = headers)
    return response.text


In [22]:
dataflow_id = 'CPI' #'ABS,CPI,1.0.0'
data_key = 'M1.AUS.M'
data = get_data(dataflow_id, data_key)

In [23]:
data

'Not enough key values in query, expecting 5 got 3'

In [25]:
import pandasdmx as sdmx

In [30]:
abs = sdmx.Request('ABS_XML')

In [31]:
flow_message = abs.dataflow()

In [32]:
flow_message

<pandasdmx.StructureMessage>
  <Header>
    id: 'IDREF81600'
    prepared: '2023-09-01T18:00:15.102605+10:00'
    receiver: <Agency Unknown>
    sender: <Agency Unknown>
    source: 
    test: False
  response: <Response [200]>
  DataflowDefinition (1186): ABORIGINAL_POP_PROJ ABORIGINAL_POP_PROJ_RE...
  DataStructureDefinition (1185): ABORIGINAL_POP_PROJ ABORIGINAL_POP_PR...

In [33]:
flow_message.dataflow.CPI_M

<DataflowDefinition ABS:CPI_M(1.2.0): Monthly Consumer Price Index (CPI) indicator>

In [37]:
dataflows = sdmx.to_pandas(flow_message.dataflow)

In [38]:
dataflows.head()

ABORIGINAL_POP_PROJ                 Projected population, Aboriginal and Torres St...
ABORIGINAL_POP_PROJ_REMOTE          Projected population, Aboriginal and Torres St...
ABS_ABORIGINAL_POPPROJ_INDREGION    Projected population, Aboriginal and Torres St...
ABS_ACLD_LFSTATUS                   Australian Census Longitudinal Dataset (ACLD):...
ABS_ACLD_TENURE                     Australian Census Longitudinal Dataset (ACLD):...
dtype: object

In [39]:
dataflows[dataflows.str.contains('CPI')]

CPI                  Consumer Price Index (CPI) 17th Series
CPI_M          Monthly Consumer Price Index (CPI) indicator
CPI_WEIGHTS              Consumer Price Index (CPI) Weights
dtype: object

In [40]:
abs_msg = abs.dataflow('CPI_M')

2023-09-01 18:05:50,306 pandasdmx.reader.sdmxml - DEBUG: Truncate sub-microsecond time in <Prepared>





--- SS without DSD ---
{1: False}

--- <class 'pandasdmx.message.StructureMessage'> ---
{2: <pandasdmx.StructureMessage>
  <Header>
    id: 'IDREF81614'
    prepared: '2023-09-01T18:05:52.508134+10:00'
    receiver: <Agency Unknown>
    sender: <Agency Unknown>
    source: 
    test: False}

--- <class 'pandasdmx.model.DataStructureDefinition'> ---
{'CPI_M': <DataStructureDefinition ABS:CPI_M(1.2.0): Consumer Price Index Datastructure>}

--- <class 'pandasdmx.model.Agency'> ---
{'ABS': <Agency ABS>}

--- <class 'pandasdmx.model.DataflowDefinition'> ---
{'CPI_M': <DataflowDefinition ABS:CPI_M(1.2.0): Monthly Consumer Price Index (CPI) indicator>}

--- <class 'pandasdmx.model.CategoryScheme'> ---
{62: <CategoryScheme ABS:ECONOMY(1.0.0) (6 items): Economy>, 63: <CategoryScheme ABS:ECONOMY(1.0.0) (1 items)>}

--- <class 'pandasdmx.model.Categorisation'> ---
{'CAT_CPI_M': <Categorisation ABS:CAT_CPI_M(1.2.0): Monthly Consumer Price Index (CPI) indicator>}

--- <class 'pandasdmx.model.Cod

XMLParseError: RuntimeError