# Import Dependencies

In [22]:
from datetime import datetime
from io import BytesIO
from sqlalchemy import create_engine, inspect, text
from time import time

import json
import locale
import numpy as np
import pandas as pd
import requests

import requests
import time as t
from geoalchemy2 import Geometry, WKTElement
import psycopg2
import geopandas as gpd
from shapely import wkb

import warnings
warnings.filterwarnings("ignore")

# 1. Database setup

Notes: put your postgresql configuration in the config.json file

In [3]:
with open('config.json') as config:
    db_config = json.load(config)

db_user = db_config['DB_USER']
db_password = db_config['DB_PASSWORD']
db_host = db_config['DB_HOST']
db_port = db_config['DB_PORT']
db_name = db_config['DB_NAME']

db_engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')
gh_inspector = inspect(db_engine)

# 2. Dataset 1: Greenhouse and energy information by designated generation facility

In [None]:
gh_energy_data_facts = [
    {
        "time_period_start": datetime(2023, 1, 7),
        "time_period_stop": datetime(2024, 6, 30),
        "dataset_id": "ID0243"
    },
    {
        "time_period_start": datetime(2022, 1, 7),
        "time_period_stop": datetime(2023, 6, 30),
        "dataset_id": "ID0083"
    },
    {
        "time_period_start": datetime(2021, 1, 7),
        "time_period_stop": datetime(2022, 6, 30),
        "dataset_id": "ID0082"
    },
    {
        "time_period_start": datetime(2020, 1, 7),
        "time_period_stop": datetime(2021, 6, 30),
        "dataset_id": "ID0081"
    },
    {
        "time_period_start": datetime(2019, 1, 7),
        "time_period_stop": datetime(2020, 6, 30),
        "dataset_id": "ID0080"
    },
    {
        "time_period_start": datetime(2018, 1, 7),
        "time_period_stop": datetime(2019, 6, 30),
        "dataset_id": "ID0079"
    },
    {
        "time_period_start": datetime(2017, 1, 7),
        "time_period_stop": datetime(2018, 6, 30),
        "dataset_id": "ID0078"
    },
    {
        "time_period_start": datetime(2016, 1, 7),
        "time_period_stop": datetime(2017, 6, 30),
        "dataset_id": "ID0077"
    },
    {
        "time_period_start": datetime(2015, 1, 7),
        "time_period_stop": datetime(2016, 6, 30),
        "dataset_id": "ID0076"
    },
    {
        "time_period_start": datetime(2014, 1, 7),
        "time_period_stop": datetime(2015, 6, 30),
        "dataset_id": "ID0075"
    }
]

gh_energy_table_name = "greenhouse_and_energy"

gh_energy_column_aliases = [
    {"reporting_entity": ["reportingentity", "reportingEntity", "controllingcorporation"]},
    {"facility_name" : ["facilityname", "facilityName"]},
    {"type": ["type"]},
    {"state": ["state"]},
    {"electricity_production_gj": ["electricityproductionGJ", "electricityProductionGJ"]},
    {"electricity_production_mwh" :["electricityproductionMWh", "electricityProductionMWh", "electricityProductionMwh"]},
    {"total_scope_1_emissions_t_co2_e": ["totalscope1emissionstCO2e", "totalScope1EmissionstCO2e", "scope1tCO2e"]},
    {"total_scope_2_emissions_t_co2_e": ["totalscope2emissionstCO2e", "totalScope2EmissionstCO2e", "totalScope2EmissionstCO2e2", "scope2tCO2e"]},
    {"total_emissions_t_co2_e": ["totalemissionstCO2e", "totalEmissionstCO2e"]},
    {"emission_intensity_t_co2_emwh": ["emissionintensitytCO2eMWh", "emissionIntensitytCO2eMWh", "emissionIntensitytMwh"]},
    {"grid_connected": ["gridconnected", "gridConnected", "gridConnected2"]},
    {"grid": ["grid"]},
    {"primary_fuel": ["primaryfuel", "primaryFuel"]},
    {"important_notes": ["importantnotes", "importantNotes"]},
    {"time_period_start": ["time_period_start"]},
    {"time_period_stop": ["time_period_stop"]},
    {"dataset_id": ["dataset_id"]} 
]

In [None]:
def standardize_column_name(df, col_name_aliases):
    col_names = df.columns.tolist()
    col_map = {}

    for col in col_names:
        for alias_dict in col_name_aliases:
            alias = list(alias_dict.values())[0]
            if col in alias:
                col_map[col] = list(alias_dict.keys())[0]
                break

    return df.rename(columns=col_map)

total_row_inserted = 0
for index, fact in enumerate(gh_energy_data_facts):
    # read csv
    dataset_id = fact['dataset_id']
    url = f'https://api.cer.gov.au/datahub-public/v1/api/ODataDataset/NGER/dataset/{dataset_id}?select%3D%2A'
    response = requests.get(url)
    df = pd.DataFrame(response.json())

    # standardize column name
    df = standardize_column_name(df, gh_energy_column_aliases)

    # add columns: time_period, dataset_id
    df['time_period_start'] = fact['time_period_start']
    df['time_period_stop'] = fact['time_period_stop']
    df['dataset_id'] = fact['dataset_id']

    if index == 0:
        # Generate the CREATE TABLE statement
        create_table_statement = pd.io.sql.get_schema(df, gh_energy_table_name)
        
        # Print the generated statement
        print(create_table_statement)

        # Create table
        df.head(n=0).to_sql(name=gh_energy_table_name, con=db_engine, if_exists='replace')
    
    print(f'inserting table {fact['dataset_id']}...')
    t_start = time()
    df.to_sql(name=gh_energy_table_name, con=db_engine, if_exists='append')
    t_end = time()
    print(f'inserted table {fact['dataset_id']} in {t_end-t_start:10.3f} seconds')
    total_row_inserted = total_row_inserted + len(df)
    print(f'{total_row_inserted} rows inserted')
        

# 3. Dataset 2: Large-scale renewable energy data

## 3.1. 2001–2024 Accredited power stations data

In [None]:
url = "https://cer.gov.au/document/historical-accredited-power-stations-and-projects-0"
df = pd.read_csv(url, encoding='ISO-8859-1')
df.head()

In [None]:
# Rename column names
df = df.rename(columns={
    'Accreditation code': 'accreditation_code',
    'Power station name': 'power_station_name',
    'State': 'state',
    'Installed capacity': 'installed_capacity',
    'Postcode': 'postcode',
    'Fuel source(s)': 'fuel_sources',
    'Accreditation start date': 'accreditation_start_date',
    'Suspension status': 'suspension_status',
    'Baseline (MWh)': 'baseline_mwh',
    'Comment': 'comment' 
})
df.columns

In [None]:
# check data types
print(df.dtypes)

In [None]:
# convert data types

locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
df['baseline_mwh'] = df['baseline_mwh'].apply(lambda x: locale.atoi(str(x)) if isinstance(x, str) else x)

df['accreditation_start_date'] = pd.to_datetime(df['accreditation_start_date'], format='%d/%m/%Y')

In [None]:
# save to database
table_name='accredited_power_stations_data'
df.to_sql(name=table_name, con=db_engine, if_exists='replace')

## 3.2. 2024 total LGCs and capacity of accredited power stations

In [None]:
url = "https://cer.gov.au/document/total-lgcs-and-capacity-accredited-power-stations-2024"
df = pd.read_csv(url, encoding='ISO-8859-1')
df.head()

In [None]:
# Rename column names
df = df.rename(columns={
    'Total LGCs in the REC Registry': 'total_lgcs_in_the_rec_registry',
    'MW of approved power stations (since 1 Jan 2024)': 'mw_of_approved_power_stations_since_1_jan_2024',
    'Approved power stations (since 1 Jan 2024)': 'approved_power_stations_since_1_jan_2024',
    'As at': 'as_at'
})
df.columns

In [None]:
# check data types
print(df.dtypes)

In [None]:
# convert data types

locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
df['mw_of_approved_power_stations_since_1_jan_2024'] = df['mw_of_approved_power_stations_since_1_jan_2024'].apply(lambda x: locale.atof(str(x)) if isinstance(x, str) else x)

df['as_at'] = pd.to_datetime(df['as_at'], format='%d/%m/%Y')

In [None]:
# save to database
table_name='total_lgcs_and_capacity_of_accredited_power_stations'
df.to_sql(name=table_name, con=db_engine, if_exists='replace')

## 3.3. Committed power stations

In [None]:
url = "https://cer.gov.au/document/power-stations-and-projects-committed"
df = pd.read_csv(url, encoding='ISO-8859-1')
df.head()

In [None]:
# Rename column names
df = df.rename(columns={
    'Project Name': 'project_name',
    'State ': 'state',
    'MW Capacity': 'mw_capacity',
    'Fuel Source': 'fuel_source',
    'Committed Date (Month/Year)': 'committed_date'
})
df.columns

In [None]:
# check data types
print(df.dtypes)

In [None]:
# convert data types
df["committed_date"] = pd.to_datetime(df["committed_date"], format="%b-%Y")

In [None]:
# save to database
table_name='committed_power_stations'
df.to_sql(name=table_name, con=db_engine, if_exists='replace')

## 3.4. Probable power stations

In [None]:
url = "https://cer.gov.au/document/power-stations-and-projects-probable"
df = pd.read_csv(url, encoding='ISO-8859-1')
df.head()

In [None]:
# Rename column names
df = df.rename(columns={
    'Project Name': 'project_name',
    'State ': 'state',
    'MW Capacity': 'mw_capacity',
    'Fuel Source': 'fuel_source',
})
df.columns

In [None]:
# check data types
print(df.dtypes)

In [None]:
# save to database
table_name='probable_power_stations'
df.to_sql(name=table_name, con=db_engine, if_exists='replace')

# 4. Dataset 3:  Australian Bureau of Statistic Data

In [None]:
# base url for the data file

url = "https://www.abs.gov.au/methodologies/data-region-methodology/2011-24/14100DO0001_2011-24.xlsx"

In [None]:
# function to clean column names

def clean_column_names(df):
    # Clean the column names
    df.columns = df.columns.str.lower()  # Convert to lowercase
    df.columns = df.columns.str.replace(' ', '_')  # Replace spaces with underscores
    df.columns = df.columns.str.replace('-', '_')  # Replace dash with underscores
    df.columns = df.columns.str.replace('no.', 'integer')  # Replace dash with underscores
    df.columns = df.columns.str.replace('%', 'pct')  # Replace % with 'pct'
    df.columns = df.columns.str.replace('[^a-z0-9_]', '', regex=True)  # Remove special characters
    return df

In [None]:
# function to convert data types

def convert_abs_datatype(df):
    locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8')
    
    unchanged_col = ["code", "label", "year"]
    
    # Opt-in to the future behavior of replace
    pd.set_option('future.no_silent_downcasting', True)
    
    for col in list(df.columns):
        if col not in unchanged_col:
            if "integer" in col:
                df[col] = df[col].replace('-', np.nan)
                df[col] = df[col].apply(lambda x: locale.atoi(str(x)) if isinstance(x, str) else x)
                df[col] = df[col].astype("Int64")
            else:
                df[col] = df[col].replace('-', np.nan)
                df[col] = df[col].apply(lambda x: locale.atof(str(x)) if isinstance(x, str) else x)
    return df

## 4.1. Estimated resident population - year ended 30 June

In [None]:
table_name = 'estimated_resident_population'

### 4.1.1. Table 1 (For exploratory)

In [None]:
df = pd.read_excel(url, sheet_name="Table 1", skiprows=6, usecols="A:C,D:L")

# check table
df.head()

In [None]:
# clean column names
df = clean_column_names(df)    
df.columns

In [None]:
# convert data type
df = convert_abs_datatype(df)
print(df.dtypes)

In [None]:
# adding geo_scope identifier
df['geo_scope'] = 1
df['geo_scope'] = df['geo_scope'].astype(int)

In [None]:
# re-check table
df.head()

In [None]:
# store data to database
df.to_sql(name=table_name, con=db_engine, if_exists='replace')

In [None]:
# the next stage will go through the same steps, to not repeating ourselves, we can use this function:

def dump_data(sheet, skip_rows, use_cols, geo_scope, postgres_table_name, store_mode):
    # read file
    df = pd.read_excel(url, sheet_name=sheet, skiprows=skip_rows, usecols=use_cols)

    # clean column names
    df = clean_column_names(df)  

    # convert data type
    df = convert_abs_datatype(df)

    # adding geo_scope identifier
    df['geo_scope'] = geo_scope
    df['geo_scope'] = df['geo_scope'].astype(int)

    # store data to database
    df.to_sql(name=postgres_table_name, con=db_engine, if_exists=store_mode)

### 4.1.2. Table 2 and table 3

In [None]:
col_span="A:C,D:L"
dump_data("Table 2", 6, col_span, 2, table_name, "append")
dump_data("Table 3", 6, col_span, 3, table_name, "append")

## 4.2. Estimated resident population - Males - year ended 30 June

In [None]:
table_name = 'estimated_resident_population_males'
col_span="A:C,M:AV"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")
# Table 3
dump_data("Table 3", 6, col_span, 3, table_name, "append")

## 4.3 Estimated resident population - Females - year ended 30 June

In [None]:
table_name = 'estimated_resident_population_females'
col_span="A:C,AW:CF"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")
# Table 3
dump_data("Table 3", 6, col_span, 3, table_name, "append")

## 4.4 Estimated resident population - Persons - year ended 30 June

In [None]:
table_name = 'estimated_resident_population_persons'
col_span="A:C,CG:DP"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")
# Table 3
dump_data("Table 3", 6, col_span, 3, table_name, "append")

## 4.5. Births and deaths - year ended 31 December

In [None]:
table_name = 'births_and_deaths'
col_span="A:C,DQ:DT"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")
# Table 3
dump_data("Table 3", 6, col_span, 3, table_name, "append")

## 4.6. Internal and overseas migration - year ended 30 June

In [None]:
table_name = 'internal_and_overseas_migration'
col_span="A:C,DU:DZ"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.7. Aboriginal and Torres Strait Islander Peoples - Census

In [None]:
table_name = 'aboriginal_and_torres_strait_islander_peoples'
col_span="A:C,EA:EB"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.8. Overseas born population

In [None]:
table_name = 'overseas_born_population'
col_span="A:C,EC:EM"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.9. Religious affiliation

In [None]:
table_name = 'religious_affiliation'
col_span="A:C,EN:EU"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.10. Australian citizenship

In [None]:
table_name = 'australian_citizenship'
col_span="A:C,EV:FA"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.11. Speaks a language other than English at home

In [None]:
table_name = 'speaks_other_than_english_at_home'
col_span="A:C,FB:FC"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

## 4.12. Australian Defence Force service - Persons aged 15 years and over

In [None]:
table_name = 'australian_defence_force_service'
col_span="A:C,FD:FG"

# Table 1
dump_data("Table 1", 6, col_span, 1, table_name, "replace")
# Table 2
dump_data("Table 2", 6, col_span, 2, table_name, "append")

# 5. Data Augmentation

## 5.1 CER Power Stations

In [4]:
# Funtion to extract state coordinates and output the dictionary
def get_state_coordinates(df, state_col='state', wait=1):
    # Extract unique states
    state_list = df[state_col].dropna().unique().tolist()

    # Initialize dictionary to store results
    state_coords = {}

    # Nominatim API setup
    base_url = 'https://nominatim.openstreetmap.org/search'
    headers = {'User-Agent': 'COMP5339'}

    # Query OpenStreetMap for each state
    for state in state_list:
        # Skip invalid states
        if state in ['-', '', 'N/A']:
            state_coords[state] = (None, None, None)
            continue  # Skip to the next state

        params = {'q': state + ', Australia', 'format': 'json'}  # Specify Australian states
        t.sleep(wait)  # Avoid overloading the service
        
        try:
            response = requests.get(base_url, params=params, headers=headers)
            if response.status_code == 200:
                data = response.json()
                if data:
                    lat = float(data[0]['lat'])
                    lon = float(data[0]['lon'])
                    spatial_id = data[0]['osm_id']
                    state_coords[state] = (spatial_id, lat, lon)
                else:
                    state_coords[state] = (None, None, None)
            else:
                state_coords[state] = (None, None, None)
        except:
            state_coords[state] = (None, None, None)

    return state_coords

In [8]:
# Function to return the latitude and longitude of the address using OpenStreetMap Nominatim API
def geocode_osm(address, state, status='Accredited', state_coords=None, wait=1):
    # Base URL for OpenStreetMap Nominatim API
    base_url = 'https://nominatim.openstreetmap.org/search'
    headers = {'User-Agent': 'COMP5339'}

    # Only try OSM lookup if status is 'Accredited'
    if status == 'Accredited':
        params = {'q': f"{address}, {state}", 'format': 'json'}
        t.sleep(wait)
        try:
            response = requests.get(base_url, params=params, headers=headers)
            if response.status_code == 200:
                data = response.json()
                if data:  # Return first result
                    osm_id = data[0].get('osm_id')
                    lat = float(data[0].get('lat'))
                    lon = float(data[0].get('lon'))
                    return osm_id, lat, lon
        except:
            pass  # silently ignore request errors

    # Fallback to state coordinates
    if state_coords and state in state_coords:
        return state_coords[state]

    # If all fails
    return None, None, None

In [5]:
# Query the table and load into DataFrame
table_name = 'accredited_power_stations_data'
query = f"SELECT * FROM {table_name}"

# Access the power stations table using pandas
accredited_df = pd.read_sql(query, con=db_engine)

# Add identifier
accredited_df['status'] = 'Accredited'

# Display the first few rows of the table
accredited_df.head()

Unnamed: 0,index,accreditation_code,power_station_name,state,installed_capacity,postcode,fuel_sources,accreditation_start_date,suspension_status,baseline_mwh,comment,status
0,0,SRPXQLG5,Varsity Views - Solar w SGU - QLD,QLD,0.2109,4227,Solar,2024-12-18,Unsuspended,0,,Accredited
1,1,SRPYNS51,NNSWLHD-Byron Central Hospital - Solar - NSW,NSW,0.7203,2481,Solar,2024-12-12,Unsuspended,0,,Accredited
2,2,SRPXQLG2,Springwood Terrace Care Community - Solar w SG...,QLD,0.189,4127,Solar,2024-12-10,Unsuspended,0,,Accredited
3,3,SRPYNS45,Bunnings Bennetts Green - Solar - NSW,NSW,0.28,2290,Solar,2024-12-03,Unsuspended,0,,Accredited
4,4,SRPYNS46,Tuggerah Home MSB1 254kW - Solar - NSW,NSW,0.254,2259,Solar,2024-12-02,Unsuspended,0,,Accredited


In [6]:
# Query the table and load into DataFrame
table_name = 'committed_power_stations'
query = f"SELECT * FROM {table_name}"

# Access the power stations table using pandas
comitted_df = pd.read_sql(query, con=db_engine)

# Add identifier
comitted_df['status'] = 'Comitted'

# Rename columns to ensure uniformity
comitted_df = comitted_df.rename(columns={'fuel_source': 'fuel_sources', 'project_name': 'power_station_name'})

comitted_df.head()

Unnamed: 0,index,power_station_name,state,mw_capacity,fuel_sources,committed_date,status
0,0,East Rockingham Resource Recovery Facility,WA,29.0,Biomass,2019-12-01,Comitted
1,1,Mangalore Renewable Energy Project,VIC,5.0,Solar,2021-09-01,Comitted
2,2,Orange Community Renewable Energy Park,NSW,5.0,Solar,2022-07-01,Comitted
3,3,Moorebank Logistics Park,NSW,60.0,Solar,2022-09-01,Comitted
4,4,Wangaratta Solar Farm,VIC,40.0,Solar,2023-07-01,Comitted


In [7]:
# Query the table and load into DataFrame
table_name = 'probable_power_stations'
query = f"SELECT * FROM {table_name}"

# Access the power stations table using pandas
probable_df = pd.read_sql(query, con=db_engine)

# Add identifier
probable_df['status'] = 'Probable'

# Rename columns to ensure uniformity
probable_df = probable_df.rename(columns={'fuel_source': 'fuel_sources', 'project_name': 'power_station_name'})

probable_df.head()

Unnamed: 0,index,power_station_name,state,mw_capacity,fuel_sources,status
0,0,Barnawartha Solar Farm,VIC,64.0,Solar,Probable
1,1,Barwon solar farm,VIC,250.0,Solar,Probable
2,2,Boddington Giga Energy,WA,400.0,Solar,Probable
3,3,Bulli Creek Solar project Stage 1,QLD,775.0,Solar,Probable
4,4,Bullyard Solar Farm,QLD,100.0,Solar,Probable


In [8]:
# Concatenate to make power_station df
power_station_df = pd.concat([accredited_df, comitted_df, probable_df], ignore_index=True)
# Convert the float column to string, removing .0 if it's an integer
power_station_df['postcode'] = power_station_df['postcode'].apply(lambda x: str(int(x)) if x.is_integer() else str(x))
power_station_df.head()

Unnamed: 0,index,accreditation_code,power_station_name,state,installed_capacity,postcode,fuel_sources,accreditation_start_date,suspension_status,baseline_mwh,comment,status,mw_capacity,committed_date
0,0,SRPXQLG5,Varsity Views - Solar w SGU - QLD,QLD,0.2109,4227,Solar,2024-12-18,Unsuspended,0.0,,Accredited,,NaT
1,1,SRPYNS51,NNSWLHD-Byron Central Hospital - Solar - NSW,NSW,0.7203,2481,Solar,2024-12-12,Unsuspended,0.0,,Accredited,,NaT
2,2,SRPXQLG2,Springwood Terrace Care Community - Solar w SG...,QLD,0.189,4127,Solar,2024-12-10,Unsuspended,0.0,,Accredited,,NaT
3,3,SRPYNS45,Bunnings Bennetts Green - Solar - NSW,NSW,0.28,2290,Solar,2024-12-03,Unsuspended,0.0,,Accredited,,NaT
4,4,SRPYNS46,Tuggerah Home MSB1 254kW - Solar - NSW,NSW,0.254,2259,Solar,2024-12-02,Unsuspended,0.0,,Accredited,,NaT


In [9]:
# Get unique state coordinates
state_coords = get_state_coordinates(power_station_df, state_col='state', wait=1)

state_coords

{'QLD': (2316595, -22.1646782, 144.5844903),
 'NSW': (2316593, -31.8759835, 147.2869493),
 'VIC': (2316741, -36.5986096, 144.6780052),
 'SA': (2316596, -30.5343665, 135.6301212),
 'WA': (2316598, -25.2303005, 121.0187246),
 'ACT': (2354197, -35.4883502, 149.0026942),
 'TAS': (2369652, -42.035067, 146.6366887),
 'NT': (2316594, -19.8516101, 133.2303375)}

In [10]:
# Apply geocode_osm to each row and unpack results into 'Latitude' and 'Longitude'
power_station_df['spatial_id'], power_station_df['Latitude'], power_station_df['Longitude'] = zip(*power_station_df.apply(lambda row: geocode_osm(row['power_station_name'], row['state'], row['status'], state_coords=state_coords), axis=1))

power_station_df.head()

Unnamed: 0,index,accreditation_code,power_station_name,state,installed_capacity,postcode,fuel_sources,accreditation_start_date,suspension_status,baseline_mwh,comment,status,mw_capacity,committed_date,spatial_id,Latitude,Longitude
0,0,SRPXQLG5,Varsity Views - Solar w SGU - QLD,QLD,0.2109,4227,Solar,2024-12-18,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.58449
1,1,SRPYNS51,NNSWLHD-Byron Central Hospital - Solar - NSW,NSW,0.7203,2481,Solar,2024-12-12,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949
2,2,SRPXQLG2,Springwood Terrace Care Community - Solar w SG...,QLD,0.189,4127,Solar,2024-12-10,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.58449
3,3,SRPYNS45,Bunnings Bennetts Green - Solar - NSW,NSW,0.28,2290,Solar,2024-12-03,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949
4,4,SRPYNS46,Tuggerah Home MSB1 254kW - Solar - NSW,NSW,0.254,2259,Solar,2024-12-02,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949


In [11]:
# Test connection and check PostGIS status using text() for SQL
with db_engine.connect() as connection:
    result = connection.execute(text("SELECT PostGIS_full_version();"))
    postgis_version = result.fetchone()
    print(f"PostGIS Version: {postgis_version[0]}")

PostGIS Version: POSTGIS="3.5.3 3.5.3" [EXTENSION] PGSQL="170" GEOS="3.13.1-CAPI-1.19.2" PROJ="8.2.1 NETWORK_ENABLED=OFF URL_ENDPOINT= USER_WRITABLE_DIRECTORY=C:\WINDOWS\ServiceProfiles\NetworkService\AppData\Local/proj" (compiled against PROJ 8.2.1) LIBXML="2.12.5" LIBJSON="0.12" LIBPROTOBUF="1.2.1" WAGYU="0.5.0 (Internal)" TOPOLOGY


In [12]:
# Convert lat/lon to PostGIS Point
power_station_df['geom'] = power_station_df.apply(
    lambda row: WKTElement(f"POINT({row['Longitude']} {row['Latitude']})", srid=4326),
    axis=1
)

power_station_df.head()

Unnamed: 0,index,accreditation_code,power_station_name,state,installed_capacity,postcode,fuel_sources,accreditation_start_date,suspension_status,baseline_mwh,comment,status,mw_capacity,committed_date,spatial_id,Latitude,Longitude,geom
0,0,SRPXQLG5,Varsity Views - Solar w SGU - QLD,QLD,0.2109,4227,Solar,2024-12-18,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.58449,POINT(144.5844903 -22.1646782)
1,1,SRPYNS51,NNSWLHD-Byron Central Hospital - Solar - NSW,NSW,0.7203,2481,Solar,2024-12-12,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,POINT(147.2869493 -31.8759835)
2,2,SRPXQLG2,Springwood Terrace Care Community - Solar w SG...,QLD,0.189,4127,Solar,2024-12-10,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.58449,POINT(144.5844903 -22.1646782)
3,3,SRPYNS45,Bunnings Bennetts Green - Solar - NSW,NSW,0.28,2290,Solar,2024-12-03,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,POINT(147.2869493 -31.8759835)
4,4,SRPYNS46,Tuggerah Home MSB1 254kW - Solar - NSW,NSW,0.254,2259,Solar,2024-12-02,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,POINT(147.2869493 -31.8759835)


In [13]:
table_name = 'spatial_power_stations_data'

# store data to database
power_station_df.to_sql(name=table_name, con=db_engine, if_exists='replace', index=False, dtype={'geom': Geometry('POINT', srid=4326)})

104

In [14]:
# Check Postgis
query = text("SELECT power_station_name, ST_AsText(geom) AS geom_wkt FROM spatial_power_stations_data LIMIT 5;")
df_check = pd.read_sql(query, con=db_engine)
print(df_check)

                                  power_station_name  \
0                  Varsity Views - Solar w SGU - QLD   
1       NNSWLHD-Byron Central Hospital - Solar - NSW   
2  Springwood Terrace Care Community - Solar w SG...   
3              Bunnings Bennetts Green - Solar - NSW   
4             Tuggerah Home MSB1 254kW - Solar - NSW   

                         geom_wkt  
0  POINT(144.5844903 -22.1646782)  
1  POINT(147.2869493 -31.8759835)  
2  POINT(144.5844903 -22.1646782)  
3  POINT(147.2869493 -31.8759835)  
4  POINT(147.2869493 -31.8759835)  


## 5.2 NGER Data

In [3]:
# Query the table and load into DataFrame
table_name = 'greenhouse_and_energy'
query = f"SELECT * FROM {table_name}"

# Access the power stations table using pandas
nger_df = pd.read_sql(query, con=db_engine)

In [15]:
unique_facility = nger_df[['facility_name', 'state']].drop_duplicates()

unique_facility.head()

Unnamed: 0,facility_name,state
0,Cathedral Rocks Wind Farm,SA
1,Gunning Wind Farm,NSW
2,Mortlake South Wind Farm,VIC
3,Mt Gellibrand Wind Farm,VIC
4,Waubra Wind Farm,VIC


In [12]:
# Get unique state coordinates
state_coords = get_state_coordinates(unique_facility, state_col='state', wait=1)

state_coords

{'SA': (2316596, -30.5343665, 135.6301212),
 'NSW': (2316593, -31.8759835, 147.2869493),
 'VIC': (2316741, -36.5986096, 144.6780052),
 '-': (None, None, None),
 'QLD': (2316595, -22.1646782, 144.5844903),
 'WA': (2316598, -25.2303005, 121.0187246),
 'ACT': (2354197, -35.4883502, 149.0026942),
 'NT': (2316594, -19.8516101, 133.2303375),
 'TAS': (2369652, -42.035067, 146.6366887),
 '': (None, None, None),
 'N/A': (None, None, None)}

In [16]:
# Apply geocode_osm to each row and unpack results into 'Latitude' and 'Longitude'
unique_facility['spatial_id'], unique_facility['Latitude'], unique_facility['Longitude'] = zip(*unique_facility.apply(lambda row: geocode_osm(row['facility_name'], row['state'], state_coords=state_coords), axis=1))

unique_facility.head()

Unnamed: 0,facility_name,state,spatial_id,Latitude,Longitude
0,Cathedral Rocks Wind Farm,SA,2316596.0,-30.534367,135.630121
1,Gunning Wind Farm,NSW,2316593.0,-31.875984,147.286949
2,Mortlake South Wind Farm,VIC,2316741.0,-36.59861,144.678005
3,Mt Gellibrand Wind Farm,VIC,2316741.0,-36.59861,144.678005
4,Waubra Wind Farm,VIC,203440016.0,-37.352857,143.631327


In [17]:
# Left join back to the full NGER data to get the spatial columns
nger_df = nger_df.merge(
    unique_facility[['facility_name', 'state', 'spatial_id', 'Latitude', 'Longitude']],
    on=['facility_name', 'state'],
    how='left'
)

In [18]:
# Convert lat/lon to PostGIS Point
nger_df['geom'] = nger_df.apply(
    lambda row: WKTElement(f"POINT({row['Longitude']} {row['Latitude']})", srid=4326),
    axis=1
)

nger_df.head()

Unnamed: 0,index,reporting_entity,facility_name,type,state,electricity_production_gj,electricity_production_mwh,total_scope_1_emissions_t_co2_e,total_scope_2_emissions_t_co2_e,total_emissions_t_co2_e,...,grid,primary_fuel,important_notes,time_period_start,time_period_stop,dataset_id,spatial_id,Latitude,Longitude,geom
0,0,ACCIONA ENERGY OCEANIA PTY LTD,Cathedral Rocks Wind Farm,F,SA,481948.0,133874.0,57.0,127.0,184,...,NEM,Wind,-,2023-01-07,2024-06-30,ID0243,2316596.0,-30.534367,135.630121,POINT(135.6301212 -30.5343665)
1,1,ACCIONA ENERGY OCEANIA PTY LTD,Gunning Wind Farm,F,NSW,491409.0,136502.0,50.0,218.0,268,...,NEM,Wind,-,2023-01-07,2024-06-30,ID0243,2316593.0,-31.875984,147.286949,POINT(147.2869493 -31.8759835)
2,2,ACCIONA ENERGY OCEANIA PTY LTD,Mortlake South Wind Farm,F,VIC,1019352.0,283153.0,202.0,1128.0,1330,...,NEM,Wind,-,2023-01-07,2024-06-30,ID0243,2316741.0,-36.59861,144.678005,POINT(144.6780052 -36.5986096)
3,3,ACCIONA ENERGY OCEANIA PTY LTD,Mt Gellibrand Wind Farm,F,VIC,1025451.0,284847.0,99.0,1273.0,1372,...,NEM,Wind,-,2023-01-07,2024-06-30,ID0243,2316741.0,-36.59861,144.678005,POINT(144.6780052 -36.5986096)
4,4,ACCIONA ENERGY OCEANIA PTY LTD,Waubra Wind Farm,F,VIC,1954964.0,543046.0,186.0,1114.0,1300,...,NEM,Wind,-,2023-01-07,2024-06-30,ID0243,203440016.0,-37.352857,143.631327,POINT(143.6313265 -37.3528575)


In [19]:
table_name = 'spatial_greenhouse_and_energy'

# store data to database
nger_df.to_sql(name=table_name, con=db_engine, if_exists='replace', index=False, dtype={'geom': Geometry('POINT', srid=4326)})

942

# 6. Data Transformation and Storage

In [55]:
# Create a new energy db to store the schema
try:
    # Connect to the default "postgres" database first
    conn = psycopg2.connect(
        dbname="postgres", 
        user=db_user, 
        password=db_password, 
        host=db_host, 
        port=db_port
    )
    # Enable autocommit mode (required for CREATE DATABASE)
    conn.autocommit = True
    
    # Create a cursor object to execute SQL commands
    cur = conn.cursor()
    
    # Try to create a new database called "energydb"
    cur.execute("CREATE DATABASE energydb;")
    
    # Close cursor and connection
    cur.close()
    conn.close()

# If the database already exists, catch the DuplicateDatabase error
except psycopg2.errors.DuplicateDatabase:
    print("Database already exists, skipping creation.")

Database already exists, skipping creation.


In [None]:
# Connect with the new energy db
energy_db_name = "energydb"
energy_db_engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{energy_db_name}')
gh_inspector = inspect(energy_db_engine)

## 6.1 Data Transformation

In [20]:
# Transform location information for loading into the dim_location table 

# Start with the states from NGER and CER data
# Query both tables and combine them into one DataFrame
query = """
SELECT state FROM spatial_greenhouse_and_energy
UNION
SELECT state FROM spatial_power_stations_data
"""

# Access and load into a single DataFrame
states_df = pd.read_sql(query, con=db_engine)

# Get unique state coordinates from the consolidated state list
state_coords = get_state_coordinates(states_df, state_col='state', wait=1)

# Convert the dictionary into a DataFrame with state_code, spatial_id, latitude, and longitude
states_df = pd.DataFrame.from_dict(state_coords, orient='index', columns=['spatial_id', 'Latitude', 'Longitude'])

# Add state_code and reorganize columns
states_df['state_code'] = states_df.index
states_df = states_df[['state_code', 'spatial_id', 'Latitude', 'Longitude']]

# Filter out invalid state_code values and reset index
states_df = states_df[~states_df['state_code'].isin(['-', '', 'N/A'])].reset_index(drop=True)

# Convert the float column to string, removing .0 if it's an integer
states_df['spatial_id'] = states_df['spatial_id'].apply(lambda x: str(int(x)) if x.is_integer() else str(x))

# Convert lat/lon to PostGIS Point
states_df['geom'] = states_df.apply(
    lambda row: WKTElement(f"POINT({row['Longitude']} {row['Latitude']})", srid=4326),
    axis=1
)

states_df.head()

Unnamed: 0,state_code,spatial_id,Latitude,Longitude,geom
0,VIC,2316741,-36.59861,144.678005,POINT(144.6780052 -36.5986096)
1,QLD,2316595,-22.164678,144.58449,POINT(144.5844903 -22.1646782)
2,TAS,2369652,-42.035067,146.636689,POINT(146.6366887 -42.035067)
3,ACT,2354197,-35.48835,149.002694,POINT(149.0026942 -35.4883502)
4,NSW,2316593,-31.875984,147.286949,POINT(147.2869493 -31.8759835)


In [31]:
# Now get the location information of each facility and power stations
# Query both tables and combine them into one DataFrame
query = f"""
    SELECT "spatial_id", "state" as state_code, "Latitude", "Longitude", "geom", "postcode"
    FROM spatial_power_stations_data
    UNION ALL
    SELECT "spatial_id", "state" as state_code, "Latitude", "Longitude", "geom", NULL as postcode
    FROM spatial_greenhouse_and_energy
"""

# Access the tables and load into a single DataFrame
power_stations_locations_df = pd.read_sql(query, con=db_engine)

# Convert the 'geom' column from WKB hex to geometry (apply once after concatenation)
power_stations_locations_df['geom'] = power_stations_locations_df['geom'].apply(lambda x: wkb.loads(bytes.fromhex(x)))

# Filter out invalid state_code values and reset index
power_stations_locations_df = power_stations_locations_df[~power_stations_locations_df['state_code'].isin(['-', '', 'N/A'])].reset_index(drop=True)

# Convert the float column to string, removing .0 if it's an integer
power_stations_locations_df['spatial_id'] = power_stations_locations_df['spatial_id'].apply(lambda x: str(int(x)) if x.is_integer() else str(x))

# Display the first few rows for verification
power_stations_locations_df.head()


Unnamed: 0,spatial_id,state_code,Latitude,Longitude,geom,postcode
0,2316595,QLD,-22.164678,144.58449,POINT (144.5844903 -22.1646782),4227
1,2316593,NSW,-31.875984,147.286949,POINT (147.2869493 -31.8759835),2481
2,2316595,QLD,-22.164678,144.58449,POINT (144.5844903 -22.1646782),4127
3,2316593,NSW,-31.875984,147.286949,POINT (147.2869493 -31.8759835),2290
4,2316593,NSW,-31.875984,147.286949,POINT (147.2869493 -31.8759835),2259


In [None]:
# Concatenate to make locations df
location_df = pd.concat([power_stations_locations_df, states_df], ignore_index=True)

# Group by 'spatial_id' and take the first non-null value for each column in the group
location_df_grouped = location_df.groupby('spatial_id', as_index=False).first()

# Rename the columns to the desired names
location_df_grouped = location_df_grouped.rename(columns={
    'spatial_id': 'location_id',      # Rename 'spatial_id' to 'location_id'
    'state_code': 'state_code',       # 'state_code' remains the same (or change it if needed)
    'Latitude': 'latitude',           # 'Latitude' to 'latitude'
    'Longitude': 'longitude',         # 'Longitude' to 'longitude'
    'geom': 'geometry'                # 'geom' to 'geometry'
})

# Reorder the columns as needed
dim_location = location_df_grouped[['location_id', 'latitude', 'longitude', 'geometry', 'postcode', 'state_code']]

# Final dim_location table
dim_location.head()

Unnamed: 0,location_id,latitude,longitude,geometry,postcode,state_code
0,1020772874,-28.181568,152.073266,POINT (152.0732657 -28.1815683),,QLD
1,104749622,-41.140587,146.905621,POINT (146.9056213 -41.1405872),,TAS
2,1053325083,-37.664813,140.415586,POINT (140.4155865 -37.6648132),,SA
3,10581620,-41.436279,147.116049,POINT (147.1160493 -41.4362794),7250.0,TAS
4,10586316,-41.257827,146.276434,POINT (146.2764337 -41.2578267),7306.0,TAS


In [None]:
# Use the tansformed states_df to make the dim_states table
# Create a mapping of state_code to state_name
state_name_mapping = {
    'NSW': 'New South Wales',
    'VIC': 'Victoria',
    'TAS': 'Tasmania',
    'ACT': 'Australian Capital Territory',
    'QLD': 'Queensland',
    'WA': 'Western Australia',
    'SA': 'South Australia',
    'NT': 'Northern Territory'
}

temp_dim_states_df = states_df

# Add state_name column based on the mapping
temp_dim_states_df['state_name'] = temp_dim_states_df['state_code'].map(state_name_mapping)

# Rename the columns to the desired names
temp_dim_states_df = temp_dim_states_df.rename(columns={
    'spatial_id': 'location_id',      # Rename 'spatial_id' to 'location_id'
})

# Reorganize columns to match desired format
dim_states = temp_dim_states_df[['state_code', 'state_name', 'location_id']]

# Final dim_states data
dim_states

Unnamed: 0,state_code,state_name,location_id
0,VIC,Victoria,2316741
1,QLD,Queensland,2316595
2,TAS,Tasmania,2369652
3,ACT,Australian Capital Territory,2354197
4,NSW,New South Wales,2316593
5,WA,Western Australia,2316598
6,SA,South Australia,2316596
7,NT,Northern Territory,2316594


In [None]:
# Transform fuel information from NGER and CER for loading into the dim_fuel table 

# Query both tables and combine them into one DataFrame
query = f"""
    SELECT "fuel_sources" as fuel_name
    FROM spatial_power_stations_data
    UNION ALL
    SELECT "primary_fuel" as fuel_name
    FROM spatial_greenhouse_and_energy
"""

# Access the tables and load into a single DataFrame
fuel_df = pd.read_sql(query, con=db_engine)

# Group by 'fuel_name' and take the first non-null value for each column in the group
fuel_df = fuel_df.groupby('fuel_name', as_index=False).first()

# Filter out invalid fuel_name values and reset index
fuel_df = fuel_df[~fuel_df['fuel_name'].isin(['-', '', 'N/A'])].reset_index(drop=True)

# Assign a unique fuel_id to each distinct fuel_name
fuel_df['fuel_id'] = range(1, len(fuel_df) + 1)

# Reorganize columns to match desired format
dim_fuel = fuel_df[['fuel_id', 'fuel_name']]

# Final dim_fuel table
dim_fuel.head()


Unnamed: 0,fuel_id,fuel_name
0,1,Bagasse
1,2,Battery
2,3,Biofuel
3,4,Biogas
4,5,Biomass


In [None]:
# Transform date information from NGER and CER for loading into the dim_time table 
# Query both tables and combine them into one DataFrame
query = """
    SELECT "accreditation_start_date" AS date_1, "committed_date" AS date_2
    FROM spatial_power_stations_data
    UNION ALL
    SELECT "time_period_start" AS date_1, "time_period_stop" AS date_2
    FROM spatial_greenhouse_and_energy
"""

# Access the tables and load into a single DataFrame
time_df = pd.read_sql(query, con=db_engine)

# Melt the DataFrame to create a single column for dates
time_df_melted = time_df.melt(value_name='date', var_name='date_type')

# Drop the date_type column if not needed
time_df_melted = time_df_melted.drop(columns=['date_type'])

# Group by 'date' to ensure no duplicates (keep first value for each date)
time_df_melted = time_df_melted.groupby('date', as_index=False).first()

# Ensure 'date' is in datetime format
time_df_melted['date'] = pd.to_datetime(time_df_melted['date'])

# Drop rows with NaN/NaT values in the 'date' column
time_df_melted = time_df_melted.dropna(subset=['date'])

# Extract the required components
time_df_melted['day'] = time_df_melted['date'].dt.day
time_df_melted['month'] = time_df_melted['date'].dt.month
time_df_melted['month_name'] = time_df_melted['date'].dt.month_name()
time_df_melted['quarter'] = time_df_melted['date'].dt.quarter
time_df_melted['year'] = time_df_melted['date'].dt.year
time_df_melted['week_of_year'] = time_df_melted['date'].dt.isocalendar().week
time_df_melted['day_of_week'] = time_df_melted['date'].dt.weekday + 1  # Monday=1, Sunday=7
time_df_melted['day_name'] = time_df_melted['date'].dt.day_name()
time_df_melted['is_weekend'] = time_df_melted['day_of_week'].isin([6, 7])  # Saturday=6, Sunday=7

# Create time_id (usually in YYYYMMDD format)
time_df_melted['time_id'] = time_df_melted['date'].dt.strftime('%Y%m%d').astype(int)

# Reorder columns to match desired structure for dim_time
dim_time = time_df_melted[['time_id', 'date', 'day', 'month', 'month_name', 'quarter', 'year', 'week_of_year', 'day_of_week', 'day_name', 'is_weekend']]

# Final dim_time table
dim_time.head()

Unnamed: 0,time_id,date,day,month,month_name,quarter,year,week_of_year,day_of_week,day_name,is_weekend
0,20010401,2001-04-01,1,4,April,2,2001,13,7,Sunday,True
1,20010403,2001-04-03,3,4,April,2,2001,14,2,Tuesday,False
2,20010404,2001-04-04,4,4,April,2,2001,14,3,Wednesday,False
3,20010417,2001-04-17,17,4,April,2,2001,16,2,Tuesday,False
4,20010420,2001-04-20,20,4,April,2,2001,16,5,Friday,False


In [105]:
# Transform power station data from CER for loading into the dim_cer_station table 
# Query the table and set it into a DataFrame
query = """
    SELECT *
    FROM spatial_power_stations_data
"""

# Access the tables and load into a single DataFrame
cer_df = pd.read_sql(query, con=db_engine)

# Filter out rows with NaN in 'power_station_name' to avoid issues with composite_key
cer_df = cer_df[cer_df['power_station_name'].notna()]

# Create a unique combination of 'accreditation_code' and 'power_station_name'
cer_df['composite_key'] = cer_df['power_station_name'].str.cat(
    cer_df['accreditation_code'].fillna('A station_'), sep="_"
)

# Get unique combinations of the composite key
unique_combinations = cer_df['composite_key'].drop_duplicates()

# Display the unique combinations
cer_df

Unnamed: 0,index,accreditation_code,power_station_name,state,installed_capacity,postcode,fuel_sources,accreditation_start_date,suspension_status,baseline_mwh,comment,status,mw_capacity,committed_date,spatial_id,Latitude,Longitude,geom,composite_key
0,0,SRPXQLG5,Varsity Views - Solar w SGU - QLD,QLD,0.2109,4227,Solar,2024-12-18,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.584490,0101000020E6100000896A0025B4126240395DBB59282A...,Varsity Views - Solar w SGU - QLD_SRPXQLG5
1,1,SRPYNS51,NNSWLHD-Byron Central Hospital - Solar - NSW,NSW,0.7203,2481,Solar,2024-12-12,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,0101000020E610000086634CB02E696240EB55647440E0...,NNSWLHD-Byron Central Hospital - Solar - NSW_S...
2,2,SRPXQLG2,Springwood Terrace Care Community - Solar w SG...,QLD,0.1890,4127,Solar,2024-12-10,Unsuspended,0.0,,Accredited,,NaT,2316595,-22.164678,144.584490,0101000020E6100000896A0025B4126240395DBB59282A...,Springwood Terrace Care Community - Solar w SG...
3,3,SRPYNS45,Bunnings Bennetts Green - Solar - NSW,NSW,0.2800,2290,Solar,2024-12-03,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,0101000020E610000086634CB02E696240EB55647440E0...,Bunnings Bennetts Green - Solar - NSW_SRPYNS45
4,4,SRPYNS46,Tuggerah Home MSB1 254kW - Solar - NSW,NSW,0.2540,2259,Solar,2024-12-02,Unsuspended,0.0,,Accredited,,NaT,2316593,-31.875984,147.286949,0101000020E610000086634CB02E696240EB55647440E0...,Tuggerah Home MSB1 254kW - Solar - NSW_SRPYNS46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3099,38,,Theodore Wind Farm,QLD,,,Wind,NaT,,,,Probable,1152.0,NaT,2316595,-22.164678,144.584490,0101000020E6100000896A0025B4126240395DBB59282A...,Theodore Wind Farm_A station_
3100,39,,Thunderbolt Wind Farm stage 1,NSW,,,Wind,NaT,,,,Probable,230.0,NaT,2316593,-31.875984,147.286949,0101000020E610000086634CB02E696240EB55647440E0...,Thunderbolt Wind Farm stage 1_A station_
3101,40,,Upper Calliope Solar Farm,QLD,,,Solar,NaT,,,,Probable,1300.0,NaT,2316595,-22.164678,144.584490,0101000020E6100000896A0025B4126240395DBB59282A...,Upper Calliope Solar Farm_A station_
3102,41,,Vales Point Solar Farm,NSW,,,Solar,NaT,,,,Probable,62.0,NaT,2316593,-31.875984,147.286949,0101000020E610000086634CB02E696240EB55647440E0...,Vales Point Solar Farm_A station_
