### Load data set with features

In [1]:
from create_df import read_df
df, dfc, all_homes, appliance_min, national_average = read_df()

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 30 days


In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [3]:
run all_functions.py

In [4]:
run features.py

In [5]:
import json

#### First round of CV, finding optimal features maximising median accuracy

In [7]:
out_mean, optimal_dict_mean = find_optimal_features(df, dfc, all_homes, appliance_min, national_average, 
                                                        ["hvac","light","fridge","dr","wm","dw"], {"All":feature_map["All"]},
                          NUM_NEIGHBOURS_MAX=7, F_length_max=7, metric="mean",
                          train_outlier=True, test_outlier=False, outlier_features=None, outlier_fraction=0.1,
                     print_steps=True)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
hvac
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
********************
All
********************
include: aggregate_5 -> feature_subset: ['aggregate_5']. Accuracy: 77.7576037951
include: seasonal_energy_7 -> feature_subset: ['aggregate_5', 'seasonal_energy_7']. Accuracy: 79.5358240671
include: gt_1000 -> feature_subset: ['aggregate_5', 'seasonal_energy_7', 'gt_1000']. Accuracy: 80.9630398559
include: seasonal_energy_6 -> feature_subset: ['aggregate_5', 'seasonal_energy_7', 'gt_1000', 'seasonal_energy_6']. Accuracy: 79.9160770052
include: aggregate_3 -> feature_subset: ['aggregate_5', 'seasonal_energy_7', 'gt_1000', 'seasonal_energy_6', 'aggregate_3']. Accuracy: 80.4263591792
include: seasonal_energy_9 -> feature_subset: ['aggregate_5', 'seasonal_energy_7', 'gt_1000', 'seasonal_energy_6', 'aggregate_3', 'seasonal_energy_9']. Accuracy: 80.7720803417
include: lt_500 -> feature_subset: ['aggregate_5', 'seasonal_energy_7', 'gt_1000', 'seasonal_energy_6', 

In [9]:
import json
json.dump(optimal_dict_mean, open("without-test-optimal_dict.json","w"))
json.dump(out_mean, open("without-test-out.json","w"))


### Storing the predictions in pd.Panel

In [11]:
predictions = {}
for appliance in ["fridge","hvac","dw","dr","wm","light"]:
    print appliance
    best_feature = optimal_dict_mean[appliance]["All"]['f']
    best_k = optimal_dict_mean[appliance]["All"]['k']
    temp= create_predictions(df, dfc, all_homes, appliance_min, 
                                              national_average, appliance=appliance,
                                                feature=best_feature, NUM_NEIGHBOURS=best_k,
                                                   train_outlier=True, test_outlier=False, 
                                                outlier_features=None,
                                                outlier_fraction=0.1)
    errors = {}
    if appliance =="hvac":
        start_month, end_month = 5, 11
    else:
        start_month, end_month = 1, 13
    for i in range(start_month, end_month):
        errors[i] = percentage_error(temp[i]["gt"], temp[i]["pred"])
        error_df = pd.DataFrame(errors)
        accur_df = 100-error_df
        accur_df[accur_df<0]=0

    tdf = accur_df
    if appliance =="hvac":
        for home in [624, 1953, 6636, 6836, 7769, 9922]:
            tdf.loc[home, 5]=np.NaN
            tdf.loc[home, 10]=np.NaN
            
    predictions[appliance]=tdf.T

fridge
hvac
dw
dr
wm
light


In [12]:
p = pd.Panel(predictions)

In [13]:
accuracy_store = pd.HDFStore("accuracy.h5")

In [14]:
accuracy_store.append("N-NILM", p)

In [15]:
accuracy_store.close()

In [16]:
appliance="hvac"
best_feature = optimal_dict_mean[appliance]["All"]['f']
best_k = optimal_dict_mean[appliance]["All"]['k']
temp= create_predictions(df, dfc, all_homes, appliance_min, 
                                          national_average, appliance=appliance,
                                            feature=best_feature, NUM_NEIGHBOURS=best_k,
                                               train_outlier=True, test_outlier=False, 
                                            outlier_features=None,
                                            outlier_fraction=0.1)
errors = {}
if appliance =="hvac":
    start_month, end_month = 5, 11
else:
    start_month, end_month = 1, 13
for i in range(start_month, end_month):
    errors[i] = percentage_error(temp[i]["gt"], temp[i]["pred"])
    error_df = pd.DataFrame(errors)
    accur_df = 100-error_df
    accur_df[accur_df<0]=0

tdf = accur_df
if appliance =="hvac":
    for home in [624, 1953, 6636, 6836, 7769, 9922]:
        tdf.loc[home, 5]=np.NaN
        tdf.loc[home, 10]=np.NaN



In [21]:
tdf.mean().mean()

79.78658674478605