## Download monthly rainfall data from SILO

In [1]:
#  import packages

import pandas as pd
import numpy as np
import requests
import csv
import json
from pandas.io.json import json_normalize
import re
import time
import os
from dotenv import load_dotenv, find_dotenv

#### SILO most recent month rainfall totals not corresponding with BOM
#### Will look at appending BOM monthly rainlist totals to SILO dataset.
#### SILO seems to come into line with BOMafter twomonths

In [2]:
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

silo_username = os.environ.get("SILO_USERNAME")
silo_password = os.environ.get("SILO_PASSWORD")


In [32]:
# List containing weather stations in SILO Patched Point Dataset throughout the WA wheatbelt. Contains 376 stations
# wa_station_list = ['9804', '9556', '10052', '10894', '10502', '10000', '10696', '10503', '8273', '10505', '9909', '10671', '10002', '10508', '9037', '10244', '8022', '10295', '11017', '10704', '8002', '8004', '9506', '9506', '9507', '9507', '9144', '8005', '10510', '9599', '11034', '10644', '10006', '10506', '10007', '10513', '10158', '8008', '10515', '10807', '8009', '9112', '12026', '9509', '9509', '9673', '10009', '10011', '12007', '10012', '10518', '10519', '10520', '8013', '9504', '10118', '9654', '9617', '10524', '10525', '10016', '11035', '12011', '12013', '10527', '10044', '8017', '10019', '9603', '9515', '9515', '10155', '10156', '8157', '9519', '9520', '8025', '10530', '9803', '9006', '10531', '10878', '10729', '10149', '9628', '9994', '10534', '10725', '10294', '8240', '8037', '12295', '10536', '12220', '10032', '10537', '10866', '10538', '9769', '10286', '8297', '8014', '9822', '9014', '10542', '9530', '10544', '9752', '8044', '10039', '10120', '10040', '10041', '10042', '10546', '10872', '9538', '8179', '10550', '10045', '8225', '8200', '9772', '9789', '9542', '9631', '11003', '11019', '8047', '10243', '8233', '11052', '10034', '9635', '9843', '12029', '10053', '10055', '10792', '8050', '8288', '9018', '9178', '10084', '8052', '8016', '10823', '10831', '10558', '10026', '10058', '12028', '10560', '10150', '8057', '9552', '8238', '10061', '10603', '8077', '10564', '10151', '10565', '9961', '10566', '8168', '8061', '10568', '10705', '12077', '8064', '8276', '8065', '10905', '10571', '9842', '10707', '10192', '9131', '10070', '8251', '9559', '10916', '12312', '10073', '10264', '9561', '10581', '10119', '10700', '10582', '8066', '10583', '8254', '10076', '8067', '10298', '10077', '10665', '10584', '10082', '9668', '10691', '10606', '10670', '10911', '9210', '10797', '9114', '8072', '10666', '9867', '12223', '9877', '8075', '8078', '8079', '9573', '9024', '10803', '9575', '10641', '9666', '11033', '10092', '9754', '8085', '10095', '8126', '8087', '10699', '8088', '10097', '10895', '8296', '10889', '12056', '9581', '9875', '10611', '10702', '9738', '10102', '8095', '8096', '11008', '12044', '10152', '9584', '8028', '10600', '10124', '9585', '10104', '10612', '10614', '11031', '9033', '9587', '10692', '10108', '12064', '12009', '10111', '8100', '9590', '10811', '10112', '10619', '10541', '10504', '9579', '10620', '8104', '9908', '10622', '9592', '9594', '8107', '8109', '8007', '10625', '10626', '10627', '10595', '9626', '10628', '10115', '10633', '11032', '11030', '10634', '9678', '10250', '12114', '10635', '8113', '9964', '9661', '12071', '9177', '8116', '12198', '9827', '12101', '10283', '9968', '8060', '12320', '10257', '10638', '10904', '9633', '9930', '10643', '10121', '10793', '10122', '9739', '8128', '10123', '10083', '10893', '9648', '9715', '8121', '10125', '10126', '12079', '10694', '9914', '10647', '9611', '9805', '9904', '8264', '8294', '10917', '9040', '10023', '9615', '8130', '10134', '10030', '9835', '9922', '9616', '12083', '9607', '10135', '10137', '10136', '10654', '9619', '10655', '9848', '9871', '8137', '10658', '10659', '9621', '8139', '10140', '10141', '8143', '10662', '9625', '10143', '9771', '10311', '10145', '12201', '10036', '9609', '8146', '8147']
wa_station_list = ['10016','9556', '10052', '10894'] #use short list for experiments
# silo_wa_stations has been filtered from list of BOM stations with observation data for the current month. Contains 238 stations
silo_wa_stations = ['9804', '9556', '10052', '10894', '10502', '10000', '10696', '10503', '8273', '10505', '10508', '9037', '10244', '8022', '11017', '8002', '8005', '9599', '10006', '10007', '10513', '10158', '8008', '10515', '9112', '12026', '9509', '10009', '10011', '10518', '10520', '8013', '9654', '9617', '10524', '10525', '10016', '12011', '10527', '10044', '10019', '9603', '9515', '10155', '8157', '9519', '8025', '10530', '9803', '10531', '10878', '10729', '10149', '9628', '9994', '10534', '10725', '10294', '8240', '8037', '10536', '10032', '10537', '10866', '9769', '10286', '8297', '8014', '9822', '9014', '10542', '9752', '8044', '10040', '10041', '10042', '10546', '10872', '9538', '10045', '8200', '9772', '9789', '9542', '9631', '11003', '11019', '9507', '11052', '10034', '9635', '10055', '10792', '8050', '9018', '9178', '8052', '8016', '10831', '10558', '10026', '10058', '8057', '9552', '10061', '8077', '10564', '10151', '10565', '9961', '8168', '10568', '10705', '12077', '10905', '10571', '9842', '10707', '10192', '9131', '8251', '10916', '10073', '10264', '9561', '10581', '10700', '10582', '8066', '8254', '10076', '10077', '10665', '10584', '10082', '10606', '10670', '10911', '9210', '9114', '8072', '12223', '9877', '8075', '8079', '9573', '10641', '10092', '9754', '8087', '8088', '10097', '8296', '10889', '9581', '10702', '9738', '10102', '8095', '11008', '12044', '10152', '9584', '8028', '10124', '9585', '10104', '10612', '10614', '9033', '9587', '10692', '12064', '12009', '10111', '8100', '9590', '10112', '10619', '10541', '9579', '10622', '9592', '8107', '10625', '10626', '10627', '10595', '9626', '10633', '10634', '9678', '10635', '8113', '9964', '9661', '12071', '9968', '8060', '12320', '10638', '10904', '9633', '9930', '10643', '10121', '10122', '9739', '8121', '10126', '10647', '9611', '9805', '8294', '10917', '9040', '9615', '8130', '10134', '12083', '9607', '10135', '10136', '10654', '9619', '9848', '8137', '8139', '10140', '8143', '10662', '9625', '10143', '9771', '10311', '10145', '12201', '8147']


In [33]:
def random_wait():
    """fn: randomly choose a wait time based on
    probability"""
    wait_times = [0.2, 0.5, 1, 2]
    probs = [0.3, 0.4, 0.2, 0.1]
    choice = np.random.choice(wait_times, size = 1, p = probs)
    return choice

In [34]:
# create list of URL's with different station numbers and corresponding start date
url_list1 = []
for stat in wa_station_list:
    #url is from old legacy site. Partially broken
    url = 'https://legacy.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php?format=Monthly&station={0}&start=20190701&finish=20190731&username={1}&password={2}'.format(stat, silo_username, silo_password)
    # url_new is from new website. Data should be better
    url_new = 'https://www.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php?start=20190701&finish=20190731&station={}&format=Monthly&username=rj71baker@gmail.com'.format(stat)
    url_list1.append(url_new)

In [35]:
# https://www.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php?start=20190701&finish=20190731&station=10016&format=Monthly&username=rj71baker@gmail.com

In [36]:
# create list of pandas dataframes for each station
colnames = ['date', 'max_temp', 'min_temp', 'rain', 'evap', 'radiation', 'vp']
df_list = []
for url in url_list1:

    df_stat = pd.read_csv(url, skiprows = 26,sep = r'\s+', header = None, names = colnames)
    df_stat['station'] = int(re.findall("station=(\\d+)",url)[0]) #regex finds station number
    df_list.append(df_stat)
    time.sleep(random_wait())

In [37]:
pd.read_csv('https://www.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php?start=20190701&finish=20190731&station=10016&format=Monthly&username=rj71baker@gmail.com', skiprows = 26,sep = r'\s+', header = None)

Unnamed: 0,0,1,2,3,4,5,6
0,201907,17.3,5.2,22.0,63.2,10.0,10.5


In [38]:
df_list

[     date  max_temp  min_temp  rain  evap  radiation    vp  station
 0  201907      17.3       5.2  22.0  63.2       10.0  10.5    10016,
      date  max_temp  min_temp  rain  evap  radiation    vp  station
 0  201907      16.0       5.8  51.6  40.8        8.9  10.7     9556,
      date  max_temp  min_temp  rain  evap  radiation    vp  station
 0  201907      17.7       5.3  45.6  65.4       10.3  11.0    10052,
      date  max_temp  min_temp  rain  evap  radiation   vp  station
 0  201907      16.7       4.5  33.6  57.6        9.4  9.7    10894]

In [39]:
# concatenate list of dfs into one
df_concat = pd.concat(df_list)

In [40]:
# make string version of original column
df_concat['date'] = df_concat['date'].astype(str)

In [41]:
# make the new columns using string indexing
df_concat['year'] = df_concat['date'].str[0:4].astype('int64')
df_concat['month'] = df_concat['date'].str[4:6].astype('int64')

In [42]:
# get rid of the extra variable (if you want)
df_concat.drop('date', axis=1, inplace=True)

In [43]:
df_concat

Unnamed: 0,max_temp,min_temp,rain,evap,radiation,vp,station,year,month
0,17.3,5.2,22.0,63.2,10.0,10.5,10016,2019,7
0,16.0,5.8,51.6,40.8,8.9,10.7,9556,2019,7
0,17.7,5.3,45.6,65.4,10.3,11.0,10052,2019,7
0,16.7,4.5,33.6,57.6,9.4,9.7,10894,2019,7


In [None]:
path = r'C:\Users\rj71b\geo-projects\wheatbelt_rainfall_analyser\data\raw'



In [None]:
df_concat.to_csv(path+'\wa_silo_july_data.csv', index = False)

In [None]:
len(wa_station_list)

### Have developed functions in src folder for downloadind weather data from SILO API and transforming to pandas dataframe

## import custom functions from src to use in notebook

In [11]:
# wa_silo_stations contains stations only in the SILO database. 116 fewer than wa_bom_stations
wa_silo_stations = ['9909', '9519', '9803', '9509', '9508', '9842', '9771', '9877', '9628', '9994', '9738', '9556', '9504', '9617', '9690', '9552', '9666', '9587', '9517', '9616', '9573', '9530', '9585', '9590', '9908', '9592', '9968', '9611', '9805', '9904', '9619', '9871', '9215', '9225', '9007', '10138', '9192', '9193', '9789', '9584', '9579', '9827', '9804', '10878', '9822', '9772', '9542', '9631', '12028', '9557', '9961', '12077', '12312', '12223', '10895', '9813', '12044', '10811', '9626', '10633', '12114', '12071', '12198', '9739', '9922', '8050', '8044', '8057', '8276', '8273', '8013', '8157', '8025', '8037', '8238', '8077', '8067', '8072', '8078', '8079', '8088', '8296', '8107', '8007', '8060', '8121', '8264', '8143', '8004', '8240', '8200', '8288', '8052', '8168', '8065', '8251', '8075', '8095', '8096', '8028', '8100', '8104', '8113', '8116', '8294', '8147', '9599', '9520', '9754', '9678', '9633', '9930', '9615', '9848', '9609', '10502', '10519', '9654', '10729', '10792', '10558', '10905', '10707', '10699', '10619', '10541', '10622', '9594', '10627', '10595', '10694', '9835', '10508', '10725', '10831', '10566', '10916', '10700', '10659', '10520', '10525', '10530', '10531', '10537', '10866', '10543', '9635', '9843', '10582', '10797', '10635', '10643', '10893', '9506', '9752', '9561', '9581', '9875', '9591', '9515', '9511', '9964', '9661', '9607', '9621', '9625', '9112', '9024', '9040', '10052', '10002', '10012', '10044', '10294', '10286', '10040', '10041', '10163', '10073', '10104', '10628', '10257', '10121', '10122', '10143', '10145', '10036', '10000', '8002', '10155', '10032', '10039', '10042', '10045', '10058', '10061', '8066', '8254', '10076', '10077', '10097', '8137', '10140', '9037', '9144', '9006', '9014', '9018', '9178', '9131', '9210', '9114', '10503', '10507', '10006', '10118', '10016', '10019', '10034', '10053', '10151', '10119', '10298', '10092', '10095', '10702', '10124', '10612', '10108', '10904', '10123', '10083', '8022', '8005', '8008', '10009', '10156', '8297', '8014', '10084', '8016', '10026', '8061', '8064', '10070', '8085', '8087', '9033', '8151', '8130', '8139', '10007', '10158', '12026', '10011', '12007', '12011', '10149', '10055', '10192', '10264', '10082', '12056', '10102', '12064', '10112', '12101', '12320', '10126', '12079', '10030', '12083', '10135', '10137', '10136', '12201', '10244', '10150', '10152', '10111', '10125', '10134', '10515', '10120', '10115', '10634', '10311', '10524', '10527', '10534', '10536', '10823', '10564', '10571', '10626', '10917', '10894', '10696', '10671', '10644', '10513', '10518', '10546', '10872', '10560', '10603', '10565', '10568', '10705', '10581', '10665', '10584', '10606', '10670', '10911', '10889', '10611', '10692', '10625', '10638', '10636', '10654', '10662', '10614', '10505', '10704', '10510', '10542', '9668', '10641', '10793', '9914', '10647', '10655', '10658']

In [2]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

In [3]:
# .py folder contains functions for dowloading SILO API weather data and transforming to pandas dataframe
from src.data import silo_downloader

In [12]:
# call the create_df function from silo_downloader. Downloads monthly weather data for stations in wa_station_list
# requires 3 fields. A list of BOM station numbers. Start date and finish date in the format "YYYYMMDD"
#SILO data starts on 18890101
df = silo_downloader.create_df(wa_silo_stations, '20190701', '20190731')

In [14]:
df.to_csv(r'C:\Users\rj71b\geo-projects\wheatbelt_rainfall_analyser\data\interim\new_silo_weather_july.csv', index = False)

In [13]:
df

Unnamed: 0,max_temp,min_temp,rain,evap,radiation,vp,station,year,month
0,17.0,8.2,96.3,48.0,9.0,12.1,9909,2019,7
0,17.5,11.0,86.2,49.8,9.2,12.1,9519,2019,7
0,16.9,8.0,85.4,46.8,9.0,12.0,9803,2019,7
0,16.5,5.2,69.5,51.4,9.5,10.7,9509,2019,7
0,16.8,7.9,149.1,42.8,8.7,11.7,9508,2019,7
0,17.2,5.6,28.5,91.2,9.0,11.5,9842,2019,7
0,17.1,7.6,95.6,48.6,9.0,12.0,9771,2019,7
0,17.6,7.7,85.4,49.8,9.2,12.0,9877,2019,7
0,16.7,5.3,63.4,47.6,9.1,10.5,9628,2019,7
0,16.9,5.0,60.6,47.6,9.1,10.5,9994,2019,7
