# Reading

The observation data is formated as XML. Let's dump it out to csv so it is easier to read

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
from datetime import datetime

In [2]:
OBS_FILE = "../data/IDV60920.xml"

In [3]:
!cat {OBS_FILE}

<?xml version="1.0"?>
<product xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="v1.7.1" xsi:noNamespaceSchemaLocation="http://www.bom.gov.au/schema/v1.7/product.xsd">
  <amoc>
    <source>
      <sender>Australian Government Bureau of Meteorology</sender>
      <region>Victoria</region>
      <office>VICRO</office>
      <copyright>http://www.bom.gov.au/other/copyright.shtml</copyright>
      <disclaimer>http://www.bom.gov.au/other/disclaimer.shtml</disclaimer>
    </source>
    <identifier>IDV60920</identifier>
    <issue-time-utc>2020-12-29T04:21:02+00:00</issue-time-utc>
    <issue-time-local tz="EDT">2020-12-29T15:21:02+11:00</issue-time-local>
    <sent-time>2020-12-29T04:22:47+00:00</sent-time>
    <status>O</status>
    <service>WSP</service>
    <product-type>O</product-type>
    <phase>NEW</phase>
  </amoc>
  <observations>
    <station wmo-id="95936" bom-id="086338" tz="Australia/Melbourne" stn-name="MELBOURNE (OLYMPIC PARK)" stn-height="7.53

In [4]:
root = ET.parse(OBS_FILE).getroot()
root

<Element 'product' at 0x7f54d3656220>

In [5]:
stations = root.findall("./observations/station")
stations

[<Element 'station' at 0x7f54d36569a0>,
 <Element 'station' at 0x7f54d365d180>,
 <Element 'station' at 0x7f54d36608b0>,
 <Element 'station' at 0x7f54d3669040>,
 <Element 'station' at 0x7f54d366d630>,
 <Element 'station' at 0x7f54d3672b30>,
 <Element 'station' at 0x7f54d36772c0>,
 <Element 'station' at 0x7f54d3677e00>,
 <Element 'station' at 0x7f54d367f450>,
 <Element 'station' at 0x7f55705119a0>,
 <Element 'station' at 0x7f55705155e0>,
 <Element 'station' at 0x7f5570519b30>,
 <Element 'station' at 0x7f5570520360>,
 <Element 'station' at 0x7f5570527a90>,
 <Element 'station' at 0x7f557052e130>,
 <Element 'station' at 0x7f5570531680>,
 <Element 'station' at 0x7f5570536d10>,
 <Element 'station' at 0x7f5570539950>,
 <Element 'station' at 0x7f557053d4f0>,
 <Element 'station' at 0x7f5570542b80>,
 <Element 'station' at 0x7f5570548270>,
 <Element 'station' at 0x7f557054d8b0>,
 <Element 'station' at 0x7f55704d0e00>,
 <Element 'station' at 0x7f55704d84f0>,
 <Element 'station' at 0x7f55704ddb80>,


In [6]:
# Lets look at the first Station

In [7]:
station = stations[0]
station.items()

[('wmo-id', '95936'),
 ('bom-id', '086338'),
 ('tz', 'Australia/Melbourne'),
 ('stn-name', 'MELBOURNE (OLYMPIC PARK)'),
 ('stn-height', '7.53'),
 ('type', 'AWS'),
 ('lat', '-37.8255'),
 ('lon', '144.9816'),
 ('forecast-district-id', 'VIC_PW007'),
 ('description', 'Melbourne (Olympic Park)')]

In [8]:
print(ET.tostring(station).decode("ascii"))

<station wmo-id="95936" bom-id="086338" tz="Australia/Melbourne" stn-name="MELBOURNE (OLYMPIC PARK)" stn-height="7.53" type="AWS" lat="-37.8255" lon="144.9816" forecast-district-id="VIC_PW007" description="Melbourne (Olympic Park)">
      <period index="0" time-utc="2020-12-29T04:20:00+00:00" time-local="2020-12-29T15:20:00+11:00" wind-src="OMD">
        <level index="0" type="surface">
          <element units="Celsius" type="apparent_temp">15.6</element>
          <element units="Celsius" type="delta_t">6.6</element>
          <element units="km/h" type="gust_kmh">24</element>
          <element units="knots" type="wind_gust_spd">13</element>
          <element units="Celsius" type="air_temperature">19.7</element>
          <element units="Celsius" type="dew_point">6.4</element>
          <element units="hPa" type="pres">1016.8</element>
          <element units="hPa" type="msl_pres">1016.8</element>
          <element units="hPa" type="qnh_pres">1017.7</element>
          <element u

In [9]:
station.find("./period").items()

[('index', '0'),
 ('time-utc', '2020-12-29T04:20:00+00:00'),
 ('time-local', '2020-12-29T15:20:00+11:00'),
 ('wind-src', 'OMD')]

In [10]:
station.find("./period/level").items()

[('index', '0'), ('type', 'surface')]

In [11]:
for e in station.findall("./period/level/"):
    print(e.items(), e.text)

[('units', 'Celsius'), ('type', 'apparent_temp')] 15.6
[('units', 'Celsius'), ('type', 'delta_t')] 6.6
[('units', 'km/h'), ('type', 'gust_kmh')] 24
[('units', 'knots'), ('type', 'wind_gust_spd')] 13
[('units', 'Celsius'), ('type', 'air_temperature')] 19.7
[('units', 'Celsius'), ('type', 'dew_point')] 6.4
[('units', 'hPa'), ('type', 'pres')] 1016.8
[('units', 'hPa'), ('type', 'msl_pres')] 1016.8
[('units', 'hPa'), ('type', 'qnh_pres')] 1017.7
[('units', '%'), ('type', 'rel-humidity')] 42
[('units', 'km'), ('type', 'vis_km')] 36
[('type', 'wind_dir')] SSW
[('units', 'deg'), ('type', 'wind_dir_deg')] 197
[('units', 'km/h'), ('type', 'wind_spd_kmh')] 17
[('units', 'knots'), ('type', 'wind_spd')] 9
[('start-time-local', '2020-12-29T09:00:00+11:00'), ('end-time-local', '2020-12-29T15:22:00+11:00'), ('duration', '382'), ('start-time-utc', '2020-12-28T22:00:00+00:00'), ('end-time-utc', '2020-12-29T04:22:00+00:00'), ('units', 'mm'), ('type', 'rainfall')] 0.0
[('start-time-local', '2020-12-28T09

In [12]:
def try_convert_float(f):
    try:
        return float(f)
    except ValueError:
        return f

def parse_station(s):
    station_items = []
    
    for key, value in station.items():
        if key not in ["wmo-id", "bom-id"]:
            value = try_convert_float(value)
            
        station_items.append([
            key,
            None,
            value
        ])
    
    for e in s.findall("./period/level/"):
        station_items.append([
            e.get("type"),
            e.get("units"),
            try_convert_float(e.text)
        ])
    # Convert to dataframe
    df = pd.DataFrame(station_items, columns=["name", "units", "value"])
    df["station"] = s.get("stn-name")
    return df
df = parse_station(station)
df

Unnamed: 0,name,units,value,station
0,wmo-id,,95936,MELBOURNE (OLYMPIC PARK)
1,bom-id,,086338,MELBOURNE (OLYMPIC PARK)
2,tz,,Australia/Melbourne,MELBOURNE (OLYMPIC PARK)
3,stn-name,,MELBOURNE (OLYMPIC PARK),MELBOURNE (OLYMPIC PARK)
4,stn-height,,7.5300,MELBOURNE (OLYMPIC PARK)
5,type,,AWS,MELBOURNE (OLYMPIC PARK)
6,lat,,-37.8255,MELBOURNE (OLYMPIC PARK)
7,lon,,144.9816,MELBOURNE (OLYMPIC PARK)
8,forecast-district-id,,VIC_PW007,MELBOURNE (OLYMPIC PARK)
9,description,,Melbourne (Olympic Park),MELBOURNE (OLYMPIC PARK)


In [13]:
results = [parse_station(s) for s in stations]
results = pd.concat(results)
results

Unnamed: 0,name,units,value,station
0,wmo-id,,95936,MELBOURNE (OLYMPIC PARK)
1,bom-id,,086338,MELBOURNE (OLYMPIC PARK)
2,tz,,Australia/Melbourne,MELBOURNE (OLYMPIC PARK)
3,stn-name,,MELBOURNE (OLYMPIC PARK),MELBOURNE (OLYMPIC PARK)
4,stn-height,,7.53,MELBOURNE (OLYMPIC PARK)
...,...,...,...,...
13,rain_hour,mm,0.00,PORTABLE VIC HARVEST F (DELWP)
14,rain_ten,mm,0.00,PORTABLE VIC HARVEST F (DELWP)
15,rel-humidity,%,44.00,PORTABLE VIC HARVEST F (DELWP)
16,rainfall,mm,0.00,PORTABLE VIC HARVEST F (DELWP)


In [27]:
results_table = results.set_index(["station", "name", "units"]).unstack(["name", "units"])
results_table.columns = results_table.columns.droplevel(0)
results_table

name,wmo-id,bom-id,tz,stn-name,stn-height,type,lat,lon,forecast-district-id,description,...,maximum_gust_spd,maximum_gust_kmh,maximum_gust_dir,cloud,cloud_oktas,cloud_base_m,cloud_type_id,weather,rain_hour,rain_ten
units,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,knots,km/h,NaN,NaN,NaN,m,NaN,NaN,mm,mm
station,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AIREYS INLET,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,11.0,20.0,SE,,,,,,,
ARARAT PRISON,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,,,,,,,,Fine,,
AVALON AIRPORT,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,20.0,37.0,ESE,Clear,0.0,,,,,
BAIRNSDALE AIRPORT,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,13.0,24.0,E,Cloudy,8.0,,,,,
BALLARAT AERODROME,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,21.0,39.0,SSW,Mostly cloudy,7.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WILSONS PROMONTORY LIGHTHOUSE,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,22.0,41.0,WSW,,,,,,,
WONTHAGGI,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,,,,Partly cloudy,5.0,,,Fine,,
YANAKIE,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,15.0,28.0,SSW,,,,,,,
YARRAM AIRPORT,95936,086338,Australia/Melbourne,MELBOURNE (OLYMPIC PARK),7.53,AWS,-37.8255,144.9816,VIC_PW007,Melbourne (Olympic Park),...,16.0,30.0,E,,,,,,,


In [15]:
# Dump out to CSV
out_fname = "../data/results_{}.csv".format(datetime.now().strftime("%Y%m%dT%H%M%S"))
out_fname

'../data/results_20201229T160058.csv'

In [28]:
results_table.to_csv(out_fname, )