# FHMM model on REDD Data
Took the data for 3 different houses for the Redd data, preprocessed the data into 10 mins intervals. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fhmm_model as fhmm
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter

In [16]:
# Read the CSV file
df = pd.read_csv('REDD/CLEAN_House1.csv')
df1 = pd.read_csv('REDD/CLEAN_House2.csv')
df2 = pd.read_csv('REDD/CLEAN_House3.csv')
# Function to process each DataFrame
def process_dataframe(df):
    df['Time'] = pd.to_datetime(df['Time'])
    df.set_index('Time', inplace=True)
    return df

# Process all DataFrames
dfs = [df, df1, df2,]
dfs = [process_dataframe(df) for df in dfs]


# Resample the data to hourly intervals and sum the power consumption for each hour
df_hourly = df.resample('10T').sum()
df1_hourly = df1.resample('10T').sum()
df2_hourly = df2.resample('10T').sum()
# Calculate the hourly consumption for each appliance in Wh
for i in range(1, 10):
    df_hourly[f'Appliance{i} (Wh)'] = df_hourly[f'Appliance{i}'] * 1000
    df1_hourly[f'Appliance{i} (Wh)'] = df1_hourly[f'Appliance{i}'] * 1000
    df2_hourly[f'Appliance{i} (Wh)'] = df2_hourly[f'Appliance{i}'] * 1000

# Convert the hourly consumption for each appliance to kWh
df_hourly = df_hourly.div(1000000)
df1_hourly = df1_hourly.div(1000000)
df2_hourly = df2_hourly.div(1000000)
# Print the resulting dataframe
print(df_hourly)
print(df1_hourly)
print(df2_hourly)

                              Unix  Aggregate  Appliance1  Appliance2  \
Time                                                                    
2013-10-09 13:00:00   42821.047153   0.018103    0.002273    0.000000   
2013-10-09 13:10:00  138132.449863   0.129945    0.006727    0.000000   
2013-10-09 13:20:00  138132.509872   0.185500    0.000000    0.001734   
2013-10-09 13:30:00  138132.569879   0.057799    0.000000    0.004652   
2013-10-09 13:40:00  133988.651694   0.079743    0.000000    0.003534   
...                            ...        ...         ...         ...   
2015-07-10 11:10:00  130723.947638   0.025085    0.002901    0.004438   
2015-07-10 11:20:00  129287.475087   0.019077    0.000000    0.004207   
2015-07-10 11:30:00  127851.000892   0.016287    0.000000    0.004079   
2015-07-10 11:40:00  132160.640044   0.016876    0.000000    0.004165   
2015-07-10 11:50:00   86191.751781   0.011049    0.000000    0.002703   

                     Appliance3  Appliance4  Appli

In [17]:
split_index = int(len(df_hourly) * 0.75)
train_df = df_hourly.iloc[:split_index]
test_df = df_hourly.iloc[split_index:]

split_index = int(len(df1_hourly) * 0.75)
train_df1 = df1_hourly.iloc[:split_index]
test_df1 = df1_hourly.iloc[split_index:]

split_index = int(len(df2_hourly) * 0.75)
train_df2 = df2_hourly.iloc[:split_index]
test_df2 = df2_hourly.iloc[split_index:]

train_df = train_df.reset_index()
train_df['timestamp'] = (pd.to_datetime(train_df['Time']).astype(np.int64) // 10**9).astype(int)
train_df = train_df[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df = train_df.rename(columns={'Aggregate': 'power'})

train_df1 = train_df1.reset_index()
train_df1['timestamp'] = (pd.to_datetime(train_df1['Time']).astype(np.int64) // 10**9).astype(int)
train_df1 = train_df1[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df1 = train_df1.rename(columns={'Aggregate': 'power'})

train_df2 = train_df2.reset_index()
train_df2['timestamp'] = (pd.to_datetime(train_df2['Time']).astype(np.int64) // 10**9).astype(int)
train_df2 = train_df2[['timestamp', 'Aggregate'] + [f'Appliance{i}' for i in range(1, 10)]]
train_df2 = train_df2.rename(columns={'Aggregate': 'power'})

test_df = test_df.reset_index()
test_df['timestamp'] = (pd.to_datetime(test_df['Time']).astype(np.int64) // 10**9).astype(int)
test_df = test_df.rename(columns={'Aggregate': 'power'})

test_df1 = test_df1.reset_index()
test_df1['timestamp'] = (pd.to_datetime(test_df1['Time']).astype(np.int64) // 10**9).astype(int)
test_df1 = test_df1.rename(columns={'Aggregate': 'power'})

test_df2 = test_df2.reset_index()
test_df2['timestamp'] = (pd.to_datetime(test_df2['Time']).astype(np.int64) // 10**9).astype(int)
test_df2 = test_df2.rename(columns={'Aggregate': 'power'})
print(test_df)

                     Time           Unix     power  Appliance1  Appliance2  \
0     2015-01-31 18:10:00  129468.257121  0.037961    0.000000    0.000000   
1     2015-01-31 18:20:00  125200.125606  0.044817    0.005925    0.000000   
2     2015-01-31 18:30:00  125200.178450  0.160781    0.006955    0.001597   
3     2015-01-31 18:40:00  126622.960802  0.212577    0.006458    0.004229   
4     2015-01-31 18:50:00  128045.744811  0.049827    0.005470    0.001630   
...                   ...            ...       ...         ...         ...   
22998 2015-07-10 11:10:00  130723.947638  0.025085    0.002901    0.004438   
22999 2015-07-10 11:20:00  129287.475087  0.019077    0.000000    0.004207   
23000 2015-07-10 11:30:00  127851.000892  0.016287    0.000000    0.004079   
23001 2015-07-10 11:40:00  132160.640044  0.016876    0.000000    0.004165   
23002 2015-07-10 11:50:00   86191.751781  0.011049    0.000000    0.002703   

       Appliance3  Appliance4  Appliance5  Appliance6  Applianc

## Disaggregation
Took the model and ran it through the REDD data, however once the model is used the only results that are ran back to myself are incorrect. 

In [18]:
list_of_appliances = [f'Appliance{i}' for i in range(1, 10)]

model = fhmm.FHMM()
model.train(train_df, list_of_appliances)
model.train(train_df1, list_of_appliances)
model.train(train_df2, list_of_appliances)
model.save("fhmm_trained_model_REDD.pkl")
prediction2 = model.disaggregate(test_df2)
prediction1 = model.disaggregate(test_df1)
prediction = model.disaggregate(test_df)
print(prediction)
print(prediction1)
print(prediction2)
print(test_df.columns)
print(prediction.columns)


                     Appliance1  Appliance2  Appliance3  Appliance4  \
timestamp                                                             
2015-01-31 18:10:00         0.0         0.0         0.0         0.0   
2015-01-31 18:20:00         0.0         0.0         0.0         0.0   
2015-01-31 18:30:00         0.0         0.0         0.0         0.0   
2015-01-31 18:40:00         0.0         0.0         0.0         0.0   
2015-01-31 18:50:00         0.0         0.0         0.0         0.0   
...                         ...         ...         ...         ...   
2015-07-10 11:10:00         0.0         0.0         0.0         0.0   
2015-07-10 11:20:00         0.0         0.0         0.0         0.0   
2015-07-10 11:30:00         0.0         0.0         0.0         0.0   
2015-07-10 11:40:00         0.0         0.0         0.0         0.0   
2015-07-10 11:50:00         0.0         0.0         0.0         0.0   

                     Appliance5  Appliance6  Appliance7  Appliance8  \
times

In [15]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Create a function to evaluate the model performance
def evaluate_model(test_df, prediction_df, appliance_names):
    evaluation_metrics = {'Appliance': [], 'MAE': [], 'RMSE': [], 'R2 Score': []}
    for i in range(1, 10):
        appliance = f'Appliance{i}'
        true_values = test_df[appliance]
        predicted_values = prediction_df[appliance]
        
        mae = mean_absolute_error(true_values, predicted_values)
        rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
        r2 = r2_score(true_values, predicted_values)
        
        evaluation_metrics['Appliance'].append(appliance)
        evaluation_metrics['MAE'].append(mae)
        evaluation_metrics['RMSE'].append(rmse)
        evaluation_metrics['R2 Score'].append(r2)
    
    return pd.DataFrame(evaluation_metrics)

appliance_names = {'Appliance1': 'Fridge', 'Appliance2': 'Oven', 'Appliance3': 'Washer', 'Appliance4': 'Dryer', 'Appliance5': 'Lighting', 'Appliance6': 'Microwave', 'Appliance7': 'TV', 'Appliance8': 'Computer', 'Appliance9': 'Others'}

evaluation_results = evaluate_model(test_df, prediction, appliance_names)
evaluation_results['Appliance'] = evaluation_results['Appliance'].map(appliance_names)

evaluation_results1 = evaluate_model(test_df1, prediction1, appliance_names)
evaluation_results1['Appliance'] = evaluation_results1['Appliance'].map(appliance_names)

evaluation_results2 = evaluate_model(test_df2, prediction2, appliance_names)
evaluation_results2['Appliance'] = evaluation_results2['Appliance'].map(appliance_names)

# Print the evaluation results
print("Evaluation results for test_df:")
print(evaluation_results)
print("\nEvaluation results for test_df1:")
print(evaluation_results1)
print("\nEvaluation results for test_df2:")
print(evaluation_results2)


Evaluation results for test_df:
   Appliance       MAE      RMSE  R2 Score
0     Fridge  0.003843  0.010270 -0.162844
1       Oven  0.003697  0.005155 -1.058985
2     Washer  0.006366  0.009084 -0.965236
3      Dryer  0.000156  0.004936 -0.000998
4   Lighting  0.002933  0.025310 -0.013611
5  Microwave  0.002467  0.027336 -0.008212
6         TV  0.000836  0.002852 -0.094030
7   Computer  0.001180  0.003039 -0.177364
8     Others  0.018046  0.058824 -0.103894

Evaluation results for test_df1:
   Appliance       MAE      RMSE  R2 Score
0     Fridge  0.007952  0.011766 -0.840799
1       Oven  0.004696  0.031136 -0.023280
2     Washer  0.018000  0.072535 -0.065620
3      Dryer  0.000967  0.002977 -0.118082
4   Lighting  0.001054  0.007945 -0.017908
5  Microwave  0.000424  0.003623 -0.013858
6         TV  0.000264  0.001047 -0.067664
7   Computer  0.005301  0.018233 -0.092325
8     Others  0.000168  0.001527 -0.012301

Evaluation results for test_df2:
   Appliance       MAE      RMSE  R2 Sco

## Result 
The performance of the model would suggest that the model works well however looking at the actual results, the only thing that is predicted are 0.0 which tells me that there are some issues with the data and the model which will be explored. 