In [271]:
import requests
import lxml.html as parse
import pandas as pd
from bs4 import BeautifulSoup 
import re
import time
from math import sin, cos, sqrt, atan2, radians

In [272]:
const_citipower = 'CitiPower'
const_jemena = 'Jemena'
const_powercor = 'Powercor'
const_ausnet = 'AusNet'
const_united = 'United'

In [273]:
def get_distance_to_distributor(lat1, lng1, distributor):
# Based on AEMO, distributor - Locations (https://aemo.com.au/-/media/Files/Electricity/NEM/Retail_and_Metering/Load_Tables/Metrology-Procedure-Part-B-v60.pdf)
# CitiPower  Melbourne 37 deg 49 min S 144 deg 58 min E : Lat -37.816667, Long 144.966667
# Jemena     Essendon  37 deg 44 min S 144 deg 54 min E : Lat -37.733333, Long 144.900000
# Powercor   Ballarat  37 deg 30 min S 143 deg 47 min E : Lat -37.500000, Long 143.783333
# AusNet     Morwell   38 deg 13 min S 146 deg 25 min E : Lat -38.216667, Long 146.416667
# United     Dandenong 38 deg 01 min S 145 deg 12 min E : Lat -38.016667, Long 145.200000
    db_lat = 0
    db_lng = 0
    
    R = 6373.0
    if distributor == const_citipower:
        db_lat = -37.816667
        db_lng = 144.966667
    elif distributor == const_jemena:
        db_lat = -37.733333
        db_lng = 144.900000        
    elif distributor == const_powercor:
        db_lat = -37.500000
        db_lng = 143.783333 
    elif distributor == const_ausnet:
        db_lat = -38.216667
        db_lng = 146.416667
    elif distributor == const_united:
        db_lat = -38.016667
        db_lng = 145.200000
        
    lat1 = radians(lat1)
    lng1 = radians(lng1)
    db_lat = radians(db_lat)
    db_lng = radians(db_lng)

    dlon = db_lng - lng1
    dlat = db_lat - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(db_lat) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
  
    return distance    

In [274]:
def get_stations_data(startYr, endYr, nccObsCode = "122"):
    #obtasin all stations which are providing 122 data in victoria
    url= f"http://www.bom.gov.au/climate/data/lists_by_element/alphaVIC_{nccObsCode}.txt"
    try:       
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")
    
    with open('Resources/VIC_Station_List.txt', 'w', newline='') as the_file:
        the_file.write(res)
        
    # Convert the station data to a csv
    headings = ["Site","Name","Lat","Lon","Start","End","Years","%","AWS"]
    colspecs = [[0,7],[8,48],[49,57],[59,67],[68,76],[77,85],[88,92],[95,97],[100,101]]
    stationsdf = pd.read_fwf('Resources/VIC_Station_List.txt', names=headings, header=None, colspecs=colspecs,skiprows = 4,skipfooter=6)
    stationsdf.to_csv("Resources/stations.csv", encoding='utf-8', index = False)
    stationsdf["Start"] = pd.to_datetime(stationsdf["Start"])
    stationsdf["End"] = pd.to_datetime(stationsdf["End"])
    stationsdf = stationsdf.loc[(stationsdf["Start"] < f'{startYr}-1-1') & (stationsdf["End"]>= f'{endYr}-12-31')]
       
    stationsdf['DistToCitip'] = None
    stationsdf['DistToJemena'] = None
    stationsdf['DistToPowercor'] = None
    stationsdf['DistToAusnet'] = None
    stationsdf['DistToUnited'] = None  
    stationsdf['Distributor'] = None 
    stationsdf = stationsdf.reset_index(drop=True)

    for i, station in stationsdf.iterrows():
        lat = float(station['Lat'])
        lon = float(station['Lon'])
        stationsdf.at[i,'DistToCitip']= get_distance_to_distributor(lat,lon, const_citipower)
        stationsdf.at[i,'DistToJemena']= get_distance_to_distributor(lat,lon, const_jemena)
        stationsdf.at[i,'DistToPowercor']= get_distance_to_distributor(lat,lon, const_powercor)
        stationsdf.at[i,'DistToAusnet']= get_distance_to_distributor(lat,lon, const_ausnet)
        stationsdf.at[i,'DistToUnited']= get_distance_to_distributor(lat,lon, const_united)        
  
    Citistationsdf = stationsdf.sort_values('DistToCitip').iloc[[0]]
    Citistationsdf = Citistationsdf.reset_index(drop=True)
    Citistationsdf.at[0,'Distributor'] = const_citipower
    
    Jemenastationsdf = stationsdf.sort_values('DistToJemena').iloc[[0]]
    Jemenastationsdf = Jemenastationsdf.reset_index(drop=True)
    Jemenastationsdf.at[0,'Distributor'] = const_jemena    
    
    Powercortationsdf = stationsdf.sort_values('DistToPowercor').iloc[[0]] 
    Powercortationsdf = Powercortationsdf.reset_index(drop=True)
    Powercortationsdf.at[0,'Distributor'] = const_powercor   

    AusNetstationsdf = stationsdf.sort_values('DistToAusnet').iloc[[0]] 
    AusNetstationsdf = AusNetstationsdf.reset_index(drop=True)
    AusNetstationsdf.at[0,'Distributor'] = const_ausnet        

    Unitedstationsdf = stationsdf.sort_values('DistToUnited').iloc[[0]]
    Unitedstationsdf = Unitedstationsdf.reset_index(drop=True)
    Unitedstationsdf.at[0,'Distributor'] = const_united

    frames = [Citistationsdf, Jemenastationsdf, Powercortationsdf, AusNetstationsdf, Unitedstationsdf]
    selectedstationsdf = pd.concat(frames, ignore_index = True)
    return selectedstationsdf    

In [275]:
def populate_P_C(stationid,nccObsCode = "122"):
    url= f"http://www.bom.gov.au/jsp/ncc/cdio/weatherStationDirectory/d?p_state=&p_display_type=ajaxStnListing&p_nccObsCode={nccObsCode}&p_stnNum={stationid}&p_radius=0#top"
    try:       
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")

    bsObj = BeautifulSoup(res, "html.parser")
    try: 
        table = bsObj.findAll("table")[0]
    except IndexError as e:
        print(f"Station {stationid} data not available. url {url}")
        return    

    tbody = table.findAll('tbody')
    tRows = tbody[0].findAll('tr')
    #sometimes there are multiple rows return need to fetch the tr corresponding to the stationid we are interested  
    p_c = ''
    for tr in tRows:
        td_list = tr.findAll('td')
        if(td_list[1].text == stationid):
            p_c = td_list[-1].text   
            break
    return p_c    

In [276]:
    
def extract_weather_data(year, stationid,p_c, lat, lon,distributor, nccObsCode = "122"):           
    url= f'http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode={nccObsCode}&p_display_type=dailyDataFile&p_startYear={year}&p_c={p_c}&p_stn_num={stationid}'
    res = requests.get(url).text
    try:  
        res = requests.get(url).text
    except urllib.error.URLError as e:
        print(f"Cannot open page {url} Error: e")
    
#   sleep a second to avoid loading the web server
    time.sleep(1)
    bsObj = BeautifulSoup(res, "html.parser")
    try: 
        table = bsObj.findAll("table", id="dataTable")[0]
    except IndexError as e:
        print(f"Station {stationid} data not available. url {url}")
        return
    
    tbody = table.findAll('tbody')
    tRows = tbody[0].findAll('tr',{"class": ""})

    COLUMN_NAMES = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    yearDF = pd.DataFrame(columns=COLUMN_NAMES)

    Day = []
    for tr in tRows:
        td_list = tr.findAll('td')

        for td in td_list:
            Day.append(td.text)

        yearDF.loc[len(yearDF)] = Day
        Day.clear()

    yearDF = yearDF.iloc[0:31]    
    yearDF.to_csv(f"Resources/Weather/_{distributor}_{year}_{nccObsCode}_{stationid}.csv", encoding='utf-8', index = False)


In [277]:

# Rain Fall = 136, Max_temp = 122, min_temp = 123, solar exposure = 193
productlist = ['122','123','136','193']
startYear = 2018
endYear =  2019
years = [*range(startYear,endYear+1,1)]

for product_code in productlist:
    stationsdf = get_stations_data(startYear, endYear,product_code)
    for yr in years:
        for i, station in stationsdf.iterrows():
            print(f"Station {station['Site']} weather data exporting")
            extract_weather_data(yr,station["Site"],populate_P_C(str(station["Site"]),product_code), station["Lat"],station["Lon"],station["Distributor"],product_code)



Station 86338 weather data exporting
Station 86038 weather data exporting
Station 89002 weather data exporting
Station 85280 weather data exporting
Station 86077 weather data exporting
Station 86338 weather data exporting
Station 86038 weather data exporting
Station 89002 weather data exporting
Station 85280 weather data exporting
Station 86077 weather data exporting
Station 86338 weather data exporting
Station 86038 weather data exporting
Station 89002 weather data exporting
Station 85280 weather data exporting
Station 86077 weather data exporting
Station 86338 weather data exporting
Station 86038 weather data exporting
Station 89002 weather data exporting
Station 85280 weather data exporting
Station 86077 weather data exporting
Station 86338 weather data exporting
Station 86038 weather data exporting
Station 89002 weather data exporting
Station 85280 weather data exporting
Station 86224 weather data exporting
Station 86338 weather data exporting
Station 86038 weather data exporting
S