#AEMO Scraping
------
This python code will scrape the "NSW ELECTRICITY PRICE AND DEMAND" table from AEMO <https://www.aemo.com.au/aemo/apps/visualisations/elec-nem-priceanddemand.html> through an API and save the table to your computer.

It also supports scraping data of the other states by changing the parameter of regularScrape(region = 'NSW1') function to the abbreviation of the state, appending '1'. E.g VIC1 / QLD1 / SA1 / TAS1.

To scrape the data automatically and periodically, run regularScrape(afterSeconds).

Great thanks to facebook user Sirichai Sasataradol who contributed majoriliy to the code logic.




In [0]:
import datetime 
import pandas as pd
import requests
import time

def scrape(region = 'NSW1', interval = "30MIN"):
    #####################################################
    # This function will scrape the data through the API#
    # and save it to your computer.                     #
    #####################################################
 
    #1. Request & Scrape
    s = requests.Session()
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 
        "Origin": "https://www.aemo.com.au",
        "Content-Type": "application/json",
        "Host": "www.aemo.com.au",
        "Origin": "https://www.aemo.com.au",
        "Referer": "https://www.aemo.com.au/aemo/apps/visualisations/elec-nem-priceanddemand.html",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin"
    }
    s.headers = headers
    json = {"timeScale": [interval]}
    url = "https://www.aemo.com.au/aemo/apps/api/report/5MIN"
    response = s.post(url, json=json)
    results = response.json()
    #Although it shows '5MIN', we are actually scraping the 30MIN data.
    history = results['5MIN'] 

    #2. Make dataframe
    data = [row.values() for row in history]
    columns = history[0].keys()
    df = pd.DataFrame(data, columns=columns)
    df.index = df['SETTLEMENTDATE']
    df = df[df['REGION']== region]
    df = df.drop(columns = ['REGIONID', 'REGION','SETTLEMENTDATE', 'REGION'])
    #The following line will save the data with its scraping time
    df.to_csv(r'./{0}-{1}-{2}.csv'.format(datetime.datetime.now().strftime("%-d:%b:%y-%H%M"),interval,region))
    
    print('{0} data archived.'.format(datetime.datetime.now().strftime("%-d/%b/%y - %H:%M")))

def regularScrape(afterSeconds = 1800, region = 'NSW1', interval = "30MIN"):
    ##################################
    #Specify the time to update data.#
    ##################################   
    while(True):
        scrape(region, interval)
        time.sleep(afterSeconds)
        #After this, the loop will pause for afterSeconds seconds
        #by default 30 minutes = 1800 seconds, unless specified.

# On the board!!!
regularScrape()