In [1]:
#  import packages

import pandas as pd
import requests
import csv
import json
import re


In [2]:
# https://www.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php?format=id&station=8012

## BOM station list to use in downloading SILO weather data

In [37]:
# wa_bom_stations contains all BOM stations in the WA wheatbelt that have observations upto 2019.
# Need to run through SILO to filter stations that are not in the SILO database
wa_bom_stations = ['9971', '9909', '9776', '9519', '9803', '9509', '9508', '9988', '9842', '9978', '9771', '9278', '9990', '9992', '9877', '9744', '9987', '9628', '9994', '9738', '9923', '9960', '9556', '9504', '9617', '9690', '9552', '9666', '9587', '9517', '9616', '9573', '9692', '9530', '9906', '9980', '9585', '9998', '9590', '9908', '9592', '9968', '9928', '9611', '9805', '9904', '9619', '9871', '9924', '9215', '9225', '9007', '10310', '10138', '9192', '9193', '9789', '9584', '9579', '9827', '9804', '12297', '12107', '9897', '9870', '10878', '9822', '9772', '9542', '9631', '12028', '9557', '9961', '12077', '12140', '12312', '12223', '10870', '10895', '9813', '12308', '9868', '12044', '10811', '9921', '9626', '10633', '12114', '12291', '12071', '12198', '12306', '9645', '9739', '9815', '9922', '10896', '8050', '8044', '8057', '8276', '8305', '8300', '8273', '8299', '8309', '8013', '8157', '8025', '8037', '8238', '8077', '10195', '8067', '8072', '9199', '8078', '8079', '8088', '8296', '8271', '8107', '8007', '8060', '8310', '8295', '8121', '8264', '8278', '10068', '8143', '8004', '8240', '8307', '8200', '8280', '8288', '8052', '8168', '8065', '8251', '8075', '8285', '8304', '8095', '8096', '8028', '8274', '8290', '8100', '8104', '8113', '8116', '8308', '8292', '8237', '8298', '8294', '8286', '8147', '9999', '9599', '9520', '9852', '9899', '9851', '9754', '9678', '9633', '9930', '9615', '9848', '9609', '10502', '10519', '9654', '10729', '10792', '10558', '10921', '10905', '10707', '10699', '10897', '10619', '10541', '10622', '9594', '10627', '10595', '10802', '10694', '9865', '9835', '10508', '10725', '10831', '10566', '10916', '10700', '10659', '10715', '10520', '10525', '10530', '10531', '10537', '10866', '10543', '9635', '9843', '9745', '10582', '10797', '10635', '10643', '9605', '10893', '9506', '9850', '9752', '9958', '9561', '9985', '9581', '9875', '9967', '9591', '9515', '9511', '9964', '9661', '9607', '9621', '9625', '9112', '9277', '9024', '9274', '9275', '9040', '10052', '10002', '10012', '10044', '10294', '10286', '10040', '10041', '10163', '10073', '10104', '10628', '10257', '10121', '10122', '10230', '10143', '10145', '10031', '10036', '10000', '8002', '10245', '10155', '10032', '10039', '10042', '10045', '10058', '10061', '8066', '8254', '10076', '10077', '10089', '10097', '8137', '8302', '10140', '9037', '9144', '9006', '9014', '9018', '9178', '9131', '9210', '9114', '9280', '9248', '9276', '9211', '9243', '8316', '10503', '10507', '10006', '10118', '10016', '10019', '10034', '10053', '10881', '10151', '10119', '10298', '10092', '10095', '10702', '10124', '10612', '10108', '10904', '10123', '10083', '8275', '8317', '8022', '8005', '8301', '8008', '8202', '10009', '10064', '10156', '9279', '8297', '8014', '10084', '8016', '10026', '8061', '8064', '10070', '8283', '8085', '8087', '8318', '9033', '8151', '8130', '8139', '10007', '10158', '12026', '10011', '12007', '12011', '10149', '10055', '10192', '10264', '12227', '10082', '12056', '10102', '12298', '12064', '10112', '12101', '12320', '10126', '12079', '10030', '12083', '10135', '10137', '10136', '12141', '12201', '10244', '10150', '10129', '10152', '10111', '9269', '9268', '10304', '10125', '10308', '10134', '10515', '10043', '10120', '10132', '10920', '10307', '10115', '10634', '10908', '10311', '10524', '10527', '10876', '10534', '10556', '10536', '10823', '10564', '10571', '10626', '10917', '10919', '10912', '10894', '10696', '10671', '10644', '10816', '10513', '10518', '10900', '10546', '10872', '10560', '10603', '10565', '10568', '10705', '10581', '10665', '10584', '10606', '10670', '10911', '10889', '10611', '10692', '10625', '10638', '10636', '10799', '10654', '10511', '10662', '10614', '10505', '10704', '10510', '10542', '10888', '9668', '10641', '10793', '9914', '10647', '10655', '10923', '10658', '10910']

len(wa_bom_stations)

451

In [49]:
# wa_silo_stations contains stations only in the SILO database. 116 fewer than wa_bom_stations
wa_silo_stations = ['9909', '9519', '9803', '9509', '9508', '9842', '9771', '9877', '9628', '9994', '9738', '9556', '9504', '9617', '9690', '9552', '9666', '9587', '9517', '9616', '9573', '9530', '9585', '9590', '9908', '9592', '9968', '9611', '9805', '9904', '9619', '9871', '9215', '9225', '9007', '10138', '9192', '9193', '9789', '9584', '9579', '9827', '9804', '10878', '9822', '9772', '9542', '9631', '12028', '9557', '9961', '12077', '12312', '12223', '10895', '9813', '12044', '10811', '9626', '10633', '12114', '12071', '12198', '9739', '9922', '8050', '8044', '8057', '8276', '8273', '8013', '8157', '8025', '8037', '8238', '8077', '8067', '8072', '8078', '8079', '8088', '8296', '8107', '8007', '8060', '8121', '8264', '8143', '8004', '8240', '8200', '8288', '8052', '8168', '8065', '8251', '8075', '8095', '8096', '8028', '8100', '8104', '8113', '8116', '8294', '8147', '9599', '9520', '9754', '9678', '9633', '9930', '9615', '9848', '9609', '10502', '10519', '9654', '10729', '10792', '10558', '10905', '10707', '10699', '10619', '10541', '10622', '9594', '10627', '10595', '10694', '9835', '10508', '10725', '10831', '10566', '10916', '10700', '10659', '10520', '10525', '10530', '10531', '10537', '10866', '10543', '9635', '9843', '10582', '10797', '10635', '10643', '10893', '9506', '9752', '9561', '9581', '9875', '9591', '9515', '9511', '9964', '9661', '9607', '9621', '9625', '9112', '9024', '9040', '10052', '10002', '10012', '10044', '10294', '10286', '10040', '10041', '10163', '10073', '10104', '10628', '10257', '10121', '10122', '10143', '10145', '10036', '10000', '8002', '10155', '10032', '10039', '10042', '10045', '10058', '10061', '8066', '8254', '10076', '10077', '10097', '8137', '10140', '9037', '9144', '9006', '9014', '9018', '9178', '9131', '9210', '9114', '10503', '10507', '10006', '10118', '10016', '10019', '10034', '10053', '10151', '10119', '10298', '10092', '10095', '10702', '10124', '10612', '10108', '10904', '10123', '10083', '8022', '8005', '8008', '10009', '10156', '8297', '8014', '10084', '8016', '10026', '8061', '8064', '10070', '8085', '8087', '9033', '8151', '8130', '8139', '10007', '10158', '12026', '10011', '12007', '12011', '10149', '10055', '10192', '10264', '10082', '12056', '10102', '12064', '10112', '12101', '12320', '10126', '12079', '10030', '12083', '10135', '10137', '10136', '12201', '10244', '10150', '10152', '10111', '10125', '10134', '10515', '10120', '10115', '10634', '10311', '10524', '10527', '10534', '10536', '10823', '10564', '10571', '10626', '10917', '10894', '10696', '10671', '10644', '10513', '10518', '10546', '10872', '10560', '10603', '10565', '10568', '10705', '10581', '10665', '10584', '10606', '10670', '10911', '10889', '10611', '10692', '10625', '10638', '10636', '10654', '10662', '10614', '10505', '10704', '10510', '10542', '9668', '10641', '10793', '9914', '10647', '10655', '10658']

len(wa_silo_stations)

335

In [50]:
#  excluded contains all stations not found in SILO database. Will look at quality of data to see if it is worth including.
excluded = ['8283', '9985', '8300', '10245', '9248', '9744', '9924', '10900', '8316', '8286', '9199', '10031', '10870', '8299', '9971', '12141', '10919', '10888', '10912', '8295', '9992', '12140', '8274', '8305', '9958', '12297', '9998', '9279', '10511', '9277', '10881', '8307', '10876', '10816', '10064', '9990', '10230', '10307', '9269', '9868', '10921', '12291', '9645', '8298', '10195', '8309', '8308', '9852', '9960', '9815', '9275', '10043', '8302', '9243', '12308', '10799', '9921', '9978', '9897', '10715', '10923', '8275', '9987', '10308', '10089', '9850', '9980', '12298', '8285', '9274', '8304', '10129', '10132', '9276', '8237', '9268', '10802', '8271', '10310', '10896', '9967', '10304', '9906', '9278', '9870', '8290', '8202', '10897', '9988', '9605', '9745', '9692', '12227', '8301', '9999', '8318', '9928', '10556', '9211', '9923', '8292', '8280', '8317', '8278', '8310', '9865', '9899', '10068', '10910', '9776', '9280', '12107', '10908', '12306', '9851', '10920']

len(excluded)

116

## Read in Bureau of Meteorology product IDCJMC0014. Australian stations measuring total monthly rainfall

#### Columns:
Site |   Name  |                                   Lat   |    Lon   |   Start | End   |    Years |  % | AWS

In [38]:
colnames = ['station', 'name', 'lat', 'lon', 'start_month', 'start_year', 'end_month', 'end_year', 'years', 'percent', 'aws']
df = pd.read_fwf('ftp://ftp.bom.gov.au/anon2/home/ncc/metadata/lists_by_element/alpha/alphaAUS_139.txt', skiprows = 5, header = None, skipfooter = 6, names = colnames)

In [39]:
df.head()

Unnamed: 0,station,name,lat,lon,start_month,start_year,end_month,end_year,years,percent,aws
0,41497,AAC DALBY CAMPUS,-27.1494,151.2894,Aug,1985,Sep,2012,26.8,98,N
1,33259,ABBOT POINT BULKCOAL,-19.8833,148.075,Jan,1991,Jun,1991,0.5,100,N
2,67114,ABBOTSBURY (FAIRFIELD (CITY FARM)),-33.8667,150.8611,Dec,2000,Jul,2019,18.3,98,N
3,40000,ABBOTSFORD,-27.95,153.1,Oct,1909,Jul,1974,64.4,99,N
4,86307,ABBOTSFORD,-37.8,145.0,May,1972,Sep,1972,0.4,99,N


In [51]:
# filter to get BOM stations with current year observations
df_current_month = df[(df['end_year'] == 2019)]

In [52]:
df_current_month.shape, df.shape

((4824, 11), (17875, 11))

In [53]:
# filter to get dataframe with metadata for WA Bom stations
df_wa = df_current_month[df_current_month['station'].isin([int(x) for x in wa_bom_stations])] #change strings in wa_stations to int

In [54]:
df_wa.shape, len(wa_bom_stations)

((453, 11), 451)

In [56]:
# send to data\external\bom_station_metadata folder
df_wa.to_csv(r'C:\Users\rj71b\geo-projects\wheatbelt_rainfall_analyser\data\external\bom_station_metadata\wa_station_metadata.csv', index = False)