# Parsing Ausgrid Data

In [None]:
import pandas as pd
from tqdm.notebook import trange, tqdm
from solara.constants import PROJECT_PATH

data = pd.read_csv(PROJECT_PATH + "/data/ausgrid/2012_2013_solar_home_electricity_data_v2.csv", skiprows=1)
data

In [None]:
def get_trace(data, trace_type="CL", customer_num=1):
    
    # Selecting rows for customer and consumption category
    subdata = data[(data["Customer"] == customer_num) 
                   & (data["Consumption Category"] == trace_type)]
    
    # Selecting trace data
    trace = subdata.loc[:, "0:30":"0:00"].values.flatten()
    
    # Make hourly instead of half hourly by adding value pairs 
    # happening in same hour
    trace = trace.reshape((len(trace)//2,2)).sum(axis=1)
    
    return trace

CUSTOMER_NUM = 2

controlled_load = get_trace(data, trace_type="CL", customer_num=CUSTOMER_NUM)
general_consumption = get_trace(data, trace_type="GC", customer_num=CUSTOMER_NUM)
total_load = controlled_load + general_consumption

solar_input = get_trace(data, trace_type="GG", customer_num=CUSTOMER_NUM)

In [None]:
# Plotting example data

import matplotlib.pyplot as plt
import numpy as np

start_day = 200 #np.random.randint(len(total_load)//24) #200
print("Start day: ", start_day)

plt.plot(total_load[start_day*24:start_day*24+25], label="load")
plt.plot(general_consumption[start_day*24:start_day*24+25], label="gen_consumption")
plt.plot(solar_input[start_day*24:start_day*24+25], label="solar")
plt.legend()
plt.show()

In [None]:
# Saving data
file_name = PROJECT_PATH + "/data/ausgrid/processed/house" + str(CUSTOMER_NUM) + "_{}.txt"
np.savetxt(file_name.format("combined_load"), total_load)
np.savetxt(file_name.format("solar_gen"), solar_input)