Process the raw Ausgrid solar data to a more usable format.<br>
Inspired by: https://github.com/pierre-haessig/ausgrid-solar-data

In [7]:
import json
import pandas as pd

In [8]:
# Combined 3 years:
# 2010-2011 Solar home electricity data.csv
# 2011-2012 Solar home electricity data v2.csv
# 2012-2013 Solar home electricity data v2.csv
fname = 'solar.csv'

d_raw = pd.read_csv(fname, skiprows=1,
                    parse_dates=['date'], dayfirst=True,
                    na_filter=False, dtype={'Row Quality': str})

  d_raw = pd.read_csv(fname, skiprows=1,


In [9]:
d_raw.head()

Unnamed: 0,Customer,Generator Capacity,Postcode,Consumption Category,date,00:30,01:00,01:30,02:00,02:30,...,20:00,20:30,21:00,21:30,22:00,22:30,23:00,23:30,00:00,Row Quality
0,1,3.78,2076,GC,2010-07-01,0.303,0.471,0.083,0.121,0.361,...,0.54,0.406,0.543,0.495,0.216,0.378,0.128,0.078,0.125,
1,1,3.78,2076,CL,2010-07-01,1.25,1.244,1.256,0.744,0.019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.075,
2,1,3.78,2076,GG,2010-07-01,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,1,3.78,2076,GC,2010-07-02,0.116,0.346,0.122,0.079,0.12,...,1.1,1.012,0.817,0.526,0.335,0.402,0.142,0.12,0.111,
4,1,3.78,2076,CL,2010-07-02,1.238,1.238,1.256,1.25,0.169,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.088,


#### Pick suitable customers for Env

In [10]:
# Group by postcode for some consistency
postcode_counts = d_raw['Postcode'].value_counts()
print("Top 5 postcodes with most samples:")
print(postcode_counts.head(5))

Top 5 postcodes with most samples:
Postcode
2259    84149
2261    55896
2290    36120
2262    23016
2093    22481
Name: count, dtype: int64


In [11]:
postcode = 2261

d_post = d_raw[d_raw['Postcode'] == postcode].copy()
d_post_cust = d_post['Customer'].unique().tolist()
print(f"Customers in postcode {postcode}: {len(d_post_cust)}")
print(d_post_cust)

Customers in postcode 2261: 21
[5, 13, 28, 50, 58, 61, 69, 70, 72, 86, 90, 127, 158, 165, 224, 246, 266, 276, 286, 292, 297]


In [12]:
for id in d_post_cust:
    
    d_customer = d_raw[d_raw['Customer'] == id]

    d_pv = d_customer[d_customer['Consumption Category'] == 'GG'].to_numpy()[:,5:53].flatten()
    d_d = d_customer[d_customer['Consumption Category'] == 'GC'].to_numpy()[:,5:53].flatten()

    print(f"Customer {id}")
    print(f"Generator Capacity: {d_customer['Generator Capacity'].iloc[0]} kWp")
    print(f"Average Generation: {d_pv.mean():.2f} kWh")
    print(f"Average Consumption: {d_d.mean():.2f} kWh")
    # print(d_d.shape, d_pv.shape)
    print("\n")

Customer 5
Generator Capacity: 1.0 kWp
Average Generation: 0.07 kWh
Average Consumption: 0.20 kWh


Customer 13
Generator Capacity: 2.22 kWp
Average Generation: 0.16 kWh
Average Consumption: 0.26 kWh


Customer 28
Generator Capacity: 1.0 kWp
Average Generation: 0.07 kWh
Average Consumption: 0.23 kWh


Customer 50
Generator Capacity: 1.02 kWp
Average Generation: 0.07 kWh
Average Consumption: 0.16 kWh


Customer 58
Generator Capacity: 1.0 kWp
Average Generation: 0.08 kWh
Average Consumption: 0.61 kWh


Customer 61
Generator Capacity: 2.0 kWp
Average Generation: 0.14 kWh
Average Consumption: 0.48 kWh


Customer 69
Generator Capacity: 2.04 kWp
Average Generation: 0.16 kWh
Average Consumption: 0.45 kWh


Customer 70
Generator Capacity: 1.0 kWp
Average Generation: 0.08 kWh
Average Consumption: 0.49 kWh


Customer 72
Generator Capacity: 1.02 kWp
Average Generation: 0.07 kWh
Average Consumption: 0.24 kWh


Customer 86
Generator Capacity: 1.02 kWp
Average Generation: 0.08 kWh
Average Consumptio

In [13]:
consumers = [5, 58, 286, 292]
prosumers = [13, 50, 158, 297]

json_data = dict()

for id in sorted(consumers + prosumers):
    
    d_customer = d_raw[d_raw['Customer'] == id]

    d_pv = d_customer[d_customer['Consumption Category'] == 'GG'].to_numpy()[:,5:53].flatten()
    d_d = d_customer[d_customer['Consumption Category'] == 'GC'].to_numpy()[:,5:53].flatten()

    # Aggregate half-hourly data to hourly by summing consecutive pairs
    d_pv = d_pv.reshape(-1, 2).sum(axis=1)
    d_d = d_d.reshape(-1, 2).sum(axis=1)

    json_data[id] = {'prosumer': True if id in prosumers else False,
                     'pv_capacity': d_customer['Generator Capacity'].iloc[0],
                     'start_date': d_customer['date'].min().strftime('%d-%m-%Y'),
                     'end_date': d_customer['date'].max().strftime('%d-%m-%Y'),
                     'pv': d_pv.tolist(),
                     'demand': d_d.tolist()}

# Save to JSON file
with open('ausgrid.json', 'w') as f:
    json.dump(json_data, f, indent=4)