### Setup

In [9]:
pip install jupysql duckdb-engine

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
#ssds

##### DuckDB Connection

In [11]:
import duckdb
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%load_ext sql
conn = duckdb.connect("test.db") 
%sql conn --alias duckdb
%sql SELECT Version() AS 'DuckDB Version'

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


DuckDB Version
v1.3.2


In [12]:
%%sql
SELECT *
  FROM Information_schema.schemata
 ORDER BY ALL DESC
 LIMIT 5;

catalog_name,schema_name,schema_owner,default_character_set_catalog,default_character_set_schema,default_character_set_name,sql_path
test,main,duckdb,,,,
temp,main,duckdb,,,,
system,pg_catalog,duckdb,,,,
system,main,duckdb,,,,
system,information_schema,duckdb,,,,


### For data source 1: https://data.cer.gov.au/datasets/NGER/ID0243

##### Data Preparation

In [13]:
# Load the electricity sector emissions and generation data for the last ten years (2014 - 2024)
df = pd.read_csv("NGER.ID0243.csv")
print(df.shape) # Check the shape of the dataframe
columns = list(df) # Get the column names
columns

(775, 14)


['Reporting entity',
 'Facility name',
 'Type',
 'State',
 'Electricity production GJ',
 'Electricity production MWh',
 'Total scope 1 emissions t CO2 e',
 'Total scope 2 emissions t CO2 e',
 'Total emissions t CO2 e',
 'Emission intensity t CO2 e MWh',
 'Grid connected',
 'Grid',
 'Primary fuel',
 'Important notes']

In [14]:
df.head(5)

Unnamed: 0,Reporting entity,Facility name,Type,State,Electricity production GJ,Electricity production MWh,Total scope 1 emissions t CO2 e,Total scope 2 emissions t CO2 e,Total emissions t CO2 e,Emission intensity t CO2 e MWh,Grid connected,Grid,Primary fuel,Important notes
0,ACCIONA ENERGY OCEANIA PTY LTD,Cathedral Rocks Wind Farm,F,SA,481948,133874,57,127,184,0.0,On,NEM,Wind,-
1,ACCIONA ENERGY OCEANIA PTY LTD,Gunning Wind Farm,F,NSW,491409,136502,50,218,268,0.0,On,NEM,Wind,-
2,ACCIONA ENERGY OCEANIA PTY LTD,Mortlake South Wind Farm,F,VIC,1019352,283153,202,1128,1330,0.0,On,NEM,Wind,-
3,ACCIONA ENERGY OCEANIA PTY LTD,Mt Gellibrand Wind Farm,F,VIC,1025451,284847,99,1273,1372,0.0,On,NEM,Wind,-
4,ACCIONA ENERGY OCEANIA PTY LTD,Waubra Wind Farm,F,VIC,1954964,543046,186,1114,1300,0.0,On,NEM,Wind,-


##### Data Cleaning

In [15]:
df.dtypes

Reporting entity                    object
Facility name                       object
Type                                object
State                               object
Electricity production GJ            int64
Electricity production MWh           int64
Total scope 1 emissions t CO2 e      int64
Total scope 2 emissions t CO2 e      int64
Total emissions t CO2 e              int64
Emission intensity t CO2 e MWh     float64
Grid connected                      object
Grid                                object
Primary fuel                        object
Important notes                     object
dtype: object

In [16]:
wrk_df = df.copy() #Make a working copy of the dataset
wrk_df = wrk_df.drop(columns=['Important notes']) # Drop the 'Important notes' column as it is not needed for analysis
wrk_df.replace('-', np.nan, inplace=True) # Replace '-' with NaN for easier handling of missing values
print(wrk_df.shape) # Check the shape of the cleaned dataframe

(775, 13)


In [17]:
duckdb.sql("CREATE TABLE IF NOT EXISTS electricity_emissions AS SELECT * FROM wrk_df")
duckdb.sql("SELECT * FROM electricity_emissions").df()

Unnamed: 0,Reporting entity,Facility name,Type,State,Electricity production GJ,Electricity production MWh,Total scope 1 emissions t CO2 e,Total scope 2 emissions t CO2 e,Total emissions t CO2 e,Emission intensity t CO2 e MWh,Grid connected,Grid,Primary fuel
0,ACCIONA ENERGY OCEANIA PTY LTD,Cathedral Rocks Wind Farm,F,SA,481948,133874,57,127,184,0.0,On,NEM,Wind
1,ACCIONA ENERGY OCEANIA PTY LTD,Gunning Wind Farm,F,NSW,491409,136502,50,218,268,0.0,On,NEM,Wind
2,ACCIONA ENERGY OCEANIA PTY LTD,Mortlake South Wind Farm,F,VIC,1019352,283153,202,1128,1330,0.0,On,NEM,Wind
3,ACCIONA ENERGY OCEANIA PTY LTD,Mt Gellibrand Wind Farm,F,VIC,1025451,284847,99,1273,1372,0.0,On,NEM,Wind
4,ACCIONA ENERGY OCEANIA PTY LTD,Waubra Wind Farm,F,VIC,1954964,543046,186,1114,1300,0.0,On,NEM,Wind
...,...,...,...,...,...,...,...,...,...,...,...,...,...
770,WIRTGEN ENERGY GLENROWAN PTY. LTD,Corporate Total,C,,766484,212912,60,543,603,,,,
771,WOOLOOGA HOLDCO 2 PTY LTD,Woolooga SF,F,QLD,1185741,329373,59,3658,3717,0.0,On,NEM,Solar
772,WOOLOOGA HOLDCO 2 PTY LTD,Corporate Total,C,,1185741,329373,59,3658,3717,,,,
773,YATPOOL SOLAR FARM HOLDCO PTY LTD,YATPOOL SOLAR FARM,F,VIC,461024,128062,0,361,361,0.0,On,NEM,Solar


### For data source 2: https://cer.gov.au/markets/reports-and-data/large-scale-renewable-energy-data


##### Data Retrieval

In [18]:
#Web Scraping to find CSV links on the CER page 
import requests
import pandas as pd
from bs4 import BeautifulSoup
import os
url = "https://cer.gov.au/markets/reports-and-data/large-scale-renewable-energy-data"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a')
for link in links:
    href = link.get('href')
    if href and href.startswith('/document/'):
        if href.endswith('accredited') or href.endswith('committed') or href.endswith('probable'):
            response = requests.get("https://cer.gov.au" + href)
            print(href)
            filename = os.path.basename(href) + ".csv"
            if os.path.exists(filename):
                print(f"{filename} already exists. Skipping download.")
                continue
            else:
                with open(filename, "wb") as f:
                    f.write(response.content)
            

/document/power-stations-and-projects-accredited
power-stations-and-projects-accredited.csv already exists. Skipping download.
/document/power-stations-and-projects-committed
power-stations-and-projects-committed.csv already exists. Skipping download.
/document/power-stations-and-projects-probable
power-stations-and-projects-probable.csv already exists. Skipping download.


In [19]:
accredited_power_station_df = pd.read_csv("power-stations-and-projects-accredited.csv")
committed_power_station_df = pd.read_csv("power-stations-and-projects-committed.csv")
probable_power_station_df = pd.read_csv("power-stations-and-projects-probable.csv")
accredited_power_station_df.head(5)

Unnamed: 0,Accreditation code,Power station name,State,Postcode,Installed capacity (MW),Fuel Source (s),Accreditation start date,Approval date
0,SRPXQLE8,"Laura Johnson Home, Townview - Solar w SGU - QLD",QLD,4825,0.2265,Solar,15/10/2024,13/01/2025
1,SRPYNS39,Leppington - Solar - NSW,NSW,2179,0.732,Solar,22/11/2024,13/01/2025
2,SRPYNS58,Quakers Hillside Care Community - Solar w SGU ...,NSW,2763,0.1996,Solar,19/12/2024,13/01/2025
3,SRPXVCN4,Rest Nominees - Solar wSGU - VIC,VIC,3008,0.1188,Solar,20/09/2024,13/01/2025
4,SRPXQLF9,Retail First Mt Ommaney-Solar-QLD,QLD,4074,1.0004,Solar,29/10/2024,13/01/2025


In [20]:
accredited_power_station_df.dtypes

Accreditation code           object
Power station name           object
State                        object
Postcode                      int64
Installed capacity (MW)     float64
Fuel Source (s)              object
Accreditation start date     object
Approval date                object
dtype: object

In [21]:
accredited_power_station_df['Accreditation start date'] = pd.to_datetime(accredited_power_station_df['Accreditation start date'], format="%d/%m/%Y")
accredited_power_station_df['Approval date'] = pd.to_datetime(accredited_power_station_df['Approval date'], format="%d/%m/%Y")
accredited_power_station_df.dtypes

Accreditation code                  object
Power station name                  object
State                               object
Postcode                             int64
Installed capacity (MW)            float64
Fuel Source (s)                     object
Accreditation start date    datetime64[ns]
Approval date               datetime64[ns]
dtype: object

In [30]:
committed_power_station_df.dtypes

Project Name                    object
State                           object
MW Capacity                    float64
Fuel Source                     object
Committed Date (Month/Year)     object
dtype: object

In [31]:
probable_power_station_df.dtypes

Project Name     object
State            object
MW Capacity     float64
Fuel Source      object
dtype: object

In [26]:
duckdb.sql("drop table if exists accredited_power_stations")
duckdb.sql("CREATE TABLE IF NOT EXISTS accredited_power_stations AS SELECT *, 'accredited' AS project_status FROM accredited_power_station_df")
duckdb.sql("select * from accredited_power_stations").df()

Unnamed: 0,Accreditation code,Power station name,State,Postcode,Installed capacity (MW),Fuel Source (s),Accreditation start date,Approval date,project_status
0,SRPXQLE8,"Laura Johnson Home, Townview - Solar w SGU - QLD",QLD,4825,0.2265,Solar,2024-10-15,2025-01-13,accredited
1,SRPYNS39,Leppington - Solar - NSW,NSW,2179,0.7320,Solar,2024-11-22,2025-01-13,accredited
2,SRPYNS58,Quakers Hillside Care Community - Solar w SGU ...,NSW,2763,0.1996,Solar,2024-12-19,2025-01-13,accredited
3,SRPXVCN4,Rest Nominees - Solar wSGU - VIC,VIC,3008,0.1188,Solar,2024-09-20,2025-01-13,accredited
4,SRPXQLF9,Retail First Mt Ommaney-Solar-QLD,QLD,4074,1.0004,Solar,2024-10-29,2025-01-13,accredited
...,...,...,...,...,...,...,...,...,...
275,SRPXVCT8,Liuzzi 71 Gower St - Solar - VIC,VIC,3072,0.4000,Solar,2025-07-21,2025-08-27,accredited
276,SRPXQLM5,Mercy Community - Solar w SGU - QLD,QLD,4014,0.8530,Solar,2025-08-04,2025-08-27,accredited
277,SRPXVCP9,Norther Airfield - Solar - VIC,VIC,3045,11.2780,Solar,2025-06-01,2025-08-27,accredited
278,SRPVWAN8,Rose Farms - Solar wSGU- WA,WA,6220,0.2000,Solar,2025-08-04,2025-08-27,accredited


In [25]:
duckdb.execute("SHOW TABLES").df()
# duckdb.sql("drop table power_stations") # Clear the table if it already exists

Unnamed: 0,name
0,accredited_power_stations
1,electricity_emissions


In [28]:
%%sql
create table if not exists committed_power_stations as
select *, 'committed' as project_status from committed_power_station_df;
select * from committed_power_stations;

Project Name,State,MW Capacity,Fuel Source,Committed Date (Month/Year),project_status
East Rockingham Resource Recovery Facility,WA,29.0,Biomass,Dec-19,committed
Mangalore Renewable Energy Project,VIC,5.0,Solar,Sep-21,committed
Orange Community Renewable Energy Park,NSW,5.0,Solar,Jul-22,committed
Moorebank Logistics Park,NSW,60.0,Solar,Sep-22,committed
Wangaratta Solar Farm,VIC,40.0,Solar,Jul-23,committed
Kidston Pumped Hydro Storage Project,QLD,250.0,Hydro,Jul-23,committed
New England Solar Farm - Stage 2,NSW,320.0,Solar,Jul-23,committed
Bellevue Gold Hybrid Power Station (Wind),WA,24.0,Wind,Sep-23,committed
Glenellen solar project,NSW,200.0,Solar,Nov-23,committed
Forest Glen Solar Farm,NSW,90.0,Solar,Dec-23,committed


In [29]:
%%sql
create table if not exists probable_power_station_df as
select *, 'probable' as project_status from probable_power_station_df;
select * from probable_power_station_df;

Project Name,State,MW Capacity,Fuel Source,project_status
Barnawartha Solar Farm,VIC,64.0,Solar,probable
Barwon solar farm,VIC,250.0,Solar,probable
Boddington Giga Energy,WA,400.0,Solar,probable
Bulli Creek Solar project Stage 1,QLD,775.0,Solar,probable
Bullyard Solar Farm,QLD,100.0,Solar,probable
Bungaban Wind Farm,QLD,1400.0,Wind,probable
Byford Solar Project,WA,30.0,Solar,probable
Campbells Forest Solar Farm,VIC,205.0,Solar,probable
Coppabella Wind Farm,NSW,284.0,Wind,probable
Derby Solar Farm & Battery,VIC,95.0,Solar,probable


In [1]:
import requests
def geocode_address(address):
    API_KEY = "AIzaSyDy1sAL8Lepu8JgRbGnLLGFKu8FE8E-krU"
    response = requests.get(f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={API_KEY}")
    geometry = response.json().get("results", [])[0].get("geometry", {}).get("location", {})
    lat, lng = geometry.get("lat"), geometry.get("lng")
    #print(f"Address: {address}, Latitude: {lat}, Longitude: {lng}") 
    return lat, lng
    

lat, lng = geocode_address("Barnawartha Solar Farm")
lat, lng

(-36.0959962, 146.6965122)

### For data source 3: https://www.abs.gov.au/methodologies/data-region-methodology/2011-24#data-downloads