# Energy Consumption Disaggregation REDD
This uses the Combinatorial Optimization (CO) model, this takes the data preprocesses it into 1hr intervals, the CO model is trained on the combined training data and then tested for each house separately. 

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import co_model as co
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter

# Read the CSV file
df = pd.read_csv('REDD/CLEAN_House1.csv')
df1 = pd.read_csv('REDD/CLEAN_House2.csv')
df2 = pd.read_csv('REDD/CLEAN_House3.csv')

# Function to process each DataFrame
def process_dataframe(df):
    df['Time'] = pd.to_datetime(df['Time'])
    df.set_index('Time', inplace=True)
    return df

# Process all DataFrames
dfs = [df, df1, df2,]
dfs = [process_dataframe(df) for df in dfs]


# Resample the data to hourly intervals and sum the power consumption for each hour
df_hourly = df.resample('10T').sum()
df1_hourly = df1.resample('10T').sum()
df2_hourly = df2.resample('10T').sum()
# Calculate the hourly consumption for each appliance in Wh
for i in range(1, 10):
    df_hourly[f'Appliance{i} (Wh)'] = df_hourly[f'Appliance{i}'] * 1000
    df1_hourly[f'Appliance{i} (Wh)'] = df1_hourly[f'Appliance{i}'] * 1000
    df2_hourly[f'Appliance{i} (Wh)'] = df2_hourly[f'Appliance{i}'] * 1000
# Convert the hourly consumption for each appliance to kWh
df_hourly = df_hourly.div(1000000)
df1_hourly = df1_hourly.div(1000000)
df2_hourly = df2_hourly.div(1000000)

split_index = int(len(df_hourly) * 0.75)
train_df = df_hourly.iloc[:split_index]
test_df = df_hourly.iloc[split_index:]

split_index = int(len(df1_hourly) * 0.75)
train_df1 = df1_hourly.iloc[:split_index]
test_df1 = df1_hourly.iloc[split_index:]

split_index = int(len(df2_hourly) * 0.75)
train_df2 = df2_hourly.iloc[:split_index]
test_df2 = df2_hourly.iloc[split_index:]

train_df = train_df.reset_index()
train_df['timestamp'] = (pd.to_datetime(train_df['Time']).astype(np.int64) // 10**9).astype(int)
train_df = train_df[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df = train_df.rename(columns={'Aggregate': 'power'})

train_df1 = train_df1.reset_index()
train_df1['timestamp'] = (pd.to_datetime(train_df1['Time']).astype(np.int64) // 10**9).astype(int)
train_df1 = train_df1[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df1 = train_df1.rename(columns={'Aggregate': 'power'})

train_df2 = train_df2.reset_index()
train_df2['timestamp'] = (pd.to_datetime(train_df2['Time']).astype(np.int64) // 10**9).astype(int)
train_df2 = train_df2[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df2 = train_df2.rename(columns={'Aggregate': 'power'})

test_df = test_df.reset_index()
test_df['timestamp'] = (pd.to_datetime(test_df['Time']).astype(np.int64) // 10**9).astype(int)
test_df = test_df.rename(columns={'Aggregate': 'power'})

test_df1 = test_df1.reset_index()
test_df1['timestamp'] = (pd.to_datetime(test_df1['Time']).astype(np.int64) // 10**9).astype(int)
test_df1 = test_df1.rename(columns={'Aggregate': 'power'})

test_df2 = test_df2.reset_index()
test_df2['timestamp'] = (pd.to_datetime(test_df2['Time']).astype(np.int64) // 10**9).astype(int)
test_df2 = test_df2.rename(columns={'Aggregate': 'power'})
list_of_appliances = [f'Appliance{i}' for i in range(1, 10)]

train_df = pd.concat([train_df, train_df1, train_df2])
model = co.CO()
model.train(train_df, list_of_appliances)

model.save("co_trained_model_REDD.pkl")

prediction2 = model.disaggregate(test_df2)
prediction1 = model.disaggregate(test_df1)
prediction = model.disaggregate(test_df)
print(prediction)
print(prediction1)
print(prediction2)

 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
 [train_on_chunk] Done training!
Estimating power demand for 'Appliance1'
Estimating power demand for 'Appliance2'
Estimating power demand for 'Appliance3'
Estimating power demand for 'Appliance4'
Estimating power demand for 'Appliance5'
Estimating power demand for 'Appliance6'
Estimating power demand for 'Appliance7'
Estimating power demand for 'Appliance8'
Estimating power demand for 'Appliance9'
Estimating power demand for 'Appliance1'
Estimating power demand for 'Appliance2'
Estimating power demand for 'Appliance3'
Estimating power demand for 'Appliance4'
Estimating power demand for 'Appliance5'
Estimating power demand for 'Appliance6'
Estimating power demand for 'Appliance7'
Estimating power demand for 'Appliance8'
Estima



In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Create a function to evaluate the model performance
def evaluate_model(test_df, prediction_df, appliance_names):
    evaluation_metrics = {'Appliance': [], 'MAE': [], 'RMSE': [], 'R2 Score': []}
    for i in range(1, 10):
        appliance = f'Appliance{i}'
        true_values = test_df[appliance]
        predicted_values = prediction_df[appliance]
        
        mae = mean_absolute_error(true_values, predicted_values)
        rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
        r2 = r2_score(true_values, predicted_values)
        
        evaluation_metrics['Appliance'].append(appliance)
        evaluation_metrics['MAE'].append(mae)
        evaluation_metrics['RMSE'].append(rmse)
        evaluation_metrics['R2 Score'].append(r2)
    
    return pd.DataFrame(evaluation_metrics)

appliance_names = {'Appliance1': 'Fridge', 'Appliance2': 'Oven', 'Appliance3': 'Washer', 'Appliance4': 'Dryer', 'Appliance5': 'Lighting', 'Appliance6': 'Microwave', 'Appliance7': 'TV', 'Appliance8': 'Computer', 'Appliance9': 'Others'}

evaluation_results = evaluate_model(test_df, prediction, appliance_names)
evaluation_results['Appliance'] = evaluation_results['Appliance'].map(appliance_names)

evaluation_results1 = evaluate_model(test_df1, prediction1, appliance_names)
evaluation_results1['Appliance'] = evaluation_results1['Appliance'].map(appliance_names)

evaluation_results2 = evaluate_model(test_df2, prediction2, appliance_names)
evaluation_results2['Appliance'] = evaluation_results2['Appliance'].map(appliance_names)

# Print the evaluation results
print("Evaluation results for test_df:")
print(evaluation_results)
print("\nEvaluation results for test_df1:")
print(evaluation_results1)
print("\nEvaluation results for test_df2:")
print(evaluation_results2)


Evaluation results for test_df:
   Appliance       MAE      RMSE  R2 Score
0     Fridge  0.001281  0.003796 -0.128579
1       Oven  0.001232  0.002151 -0.488803
2     Washer  0.002122  0.003502 -0.579889
3      Dryer  0.000052  0.002586 -0.000404
4   Lighting  0.000978  0.010606 -0.008570
5  Microwave  0.000822  0.010921 -0.005703
6         TV  0.000279  0.001141 -0.063418
7   Computer  0.000393  0.001060 -0.159588
8     Others  0.006016  0.019899 -0.100581

Evaluation results for test_df1:
   Appliance       MAE      RMSE  R2 Score
0     Fridge  0.002651  0.004339 -0.595357
1       Oven  0.001566  0.013674 -0.013283
2     Washer  0.006000  0.029976 -0.041742
3      Dryer  0.000322  0.001062 -0.101507
4   Lighting  0.000351  0.003362 -0.011036
5  Microwave  0.000141  0.001872 -0.005722
6         TV  0.000088  0.000355 -0.065208
7   Computer  0.001767  0.009557 -0.035400
8     Others  0.000056  0.000567 -0.009904

Evaluation results for test_df2:
   Appliance       MAE      RMSE  R2 Sco

# Conclusion
The evaluation results show the performance of the CO model for disaggregating energy consumption for each appliance in the three houses. Further improvements can be made by tuning the model hyperparameters or exploring other disaggregation techniques. At the current moment when I was finished working here I would prefer using the FHMM model compared. 