In [1]:
import requests
import lxml.html as parse
import pandas as pd
from bs4 import BeautifulSoup 
import re

In [2]:
def get_stations_data(startYr, endYr, nccObsCode = "122"):
    url= f"http://www.bom.gov.au/climate/data/lists_by_element/alphaVIC_{nccObsCode}.txt"
    try:       
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")
    
    with open('Resources/VIC_Station_List.txt', 'w', newline='') as the_file:
        the_file.write(res)
        
    # Convert the station data to a csv
    headings = ["Site","Name","Lat","Lon","Start","End","Years","%","AWS"]
    colspecs = [[0,7],[8,48],[49,57],[59,67],[68,76],[77,85],[88,92],[95,97],[100,101]]
    stationsdf = pd.read_fwf('Resources/VIC_Station_List.txt', names=headings, header=None, colspecs=colspecs,skiprows = 4,skipfooter=6)
    stationsdf.to_csv("Resources/stations.csv", encoding='utf-8', index = False)
    stationsdf["Start"] = pd.to_datetime(stationsdf["Start"])
    stationsdf["End"] = pd.to_datetime(stationsdf["End"])
    stationsdf = stationsdf.loc[(stationsdf["Start"] < f'{startYr}-1-1') & (stationsdf["End"]>= f'{endYr}-12-31')]
    return stationsdf    

In [3]:
def populate_P_C(stationid,nccObsCode = "122"):
    url= f"http://www.bom.gov.au/jsp/ncc/cdio/weatherStationDirectory/d?p_state=&p_display_type=ajaxStnListing&p_nccObsCode={nccObsCode}&p_stnNum={stationid}&p_radius=0#top"
    try:       
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")

    bsObj = BeautifulSoup(res, "html.parser")
    try: 
        table = bsObj.findAll("table")[0]
    except IndexError as e:
        print(f"Station {stationid} data not available. url {url}")
        return    

    tbody = table.findAll('tbody')
    tRows = tbody[0].findAll('tr')
    td_list = tRows[0].findAll('td')
    p_c = td_list[-1].text
    
    return p_c    

In [4]:
    
def extract_weather_data(year, stationid,p_c, lat, lon,nccObsCode = "122"):           
    url= f'http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode={nccObsCode}&p_display_type=dailyDataFile&p_startYear={year}&p_c={p_c}&p_stn_num={stationid}'
    res = requests.get(url).text
    try:  
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")
        
    bsObj = BeautifulSoup(res, "html.parser")
    try: 
        table = bsObj.findAll("table", id="dataTable")[0]
    except IndexError as e:
        print(f"Station {stationid} data not available. url {url}")
        return
    
    tbody = table.findAll('tbody')
    tRows = tbody[0].findAll('tr',{"class": ""})

    COLUMN_NAMES = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    yearDF = pd.DataFrame(columns=COLUMN_NAMES)

    Day = []
    for tr in tRows:
        td_list = tr.findAll('td')

        for td in td_list:
            Day.append(td.text)

        yearDF.loc[len(yearDF)] = Day
        Day.clear()

    yearDF = yearDF.iloc[0:31]    
    yearDF.to_csv(f"Resources/Weather/stationID_{stationid}_{year}_{nccObsCode}_Lat_{lat}_Lon_{lon}.csv", encoding='utf-8', index = False)


In [5]:

# Rain Fall = 136, Max_temp = 122, min_temp = 123, solar exposure = 193
product_code = "122"
startYear = 2018
endYear =  2019
years = [*range(startYear,endYear+1,1)]
stationsdf = get_stations_data(startYear, endYear,product_code)

for yr in years:
    for i, station in stationsdf.iterrows():
        print(f"Station {station['Site']} weather data exported")
        extract_weather_data(yr,station["Site"],populate_P_C(station["Site"],product_code), station["Lat"],station["Lon"],product_code)
        
        

Station 90180 weather data exported
Station 89085 weather data exported
Station 87113 weather data exported
Station 85279 weather data exported
Station 89002 weather data exported
Station 82170 weather data exported
Station 81123 weather data exported
Station 87184 weather data exported
Station 90184 weather data exported
Station 90015 weather data exported
Station 90182 weather data exported
Station 88110 weather data exported
Station 86361 weather data exported
Station 80128 weather data exported
Station 90035 weather data exported
Station 86383 weather data exported
Station 84143 weather data exported
Station 85301 weather data exported
Station 82169 weather data exported
Station 86375 weather data exported
Station 90194 weather data exported
Station 82076 weather data exported
Station 85072 weather data exported
Station 85314 weather data exported
Station 80015 weather data exported
Station 79099 weather data exported
Station 83083 weather data exported
Station 88164 weather data e