In [1]:
import torch
from torch.autograd import Variable
import numpy as np
import torch.nn as nn
from torchvision import datasets,transforms
import torch.nn.functional as F
import sys
sys.path.append("../code/")
from dataloader import APPLIANCE_ORDER, get_train_test, ON_THRESHOLD
from sklearn.metrics import mean_absolute_error
import os
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
sys.path.append('../code/')
%matplotlib inline
import itertools
from pathlib import Path

In [2]:
def onoff_error(pred, gt, threshold):
    abs_error = np.abs(pred-gt)
    error = [x for x in abs_error.reshape(1, -1).tolist()[0] if x >= threshold]
    
    return np.mean(error)

In [3]:
tensor = np.load("../2015-5appliances.numpy.npy")
dr = tensor[:, 3]

In [4]:
# calculate gt
test_gt = {}
valid_gt = {}
for fold_num in range(5):
    test_gt[fold_num] = {}
    valid_gt[fold_num] = {}
    train, test = get_train_test(1, 5, fold_num)
    valid = train[int(0.8*len(train)):].copy()
    for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):
        test_gt[fold_num][appliance] = test[:, idx+1]
        valid_gt[fold_num][appliance] = valid[:, idx+1]

In [5]:
threshold = {}
for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
    sample_list = []
    for fold_num in range(5):
        sample_list = np.append(sample_list, [x for x in test_gt[fold_num][appliance].reshape(1, -1).tolist()[0] if x > ON_THRESHOLD[appliance]])
    mean = np.mean(sample_list)
    print(appliance, mean)
    threshold[appliance] = 0.1*mean

hvac 1278.91522439
fridge 90.4680514664
dr 1411.75680957
dw 390.600781657
mw 86.8746588166


### Individual RNN and TreeRNN
#### Just use the best parameter found before as RNN always predict dryer as zero

In [6]:
def calculate_error(pred, gt, threshold):
    error = {}
    overall = {}
    num_homes = {}
    
    # calculte number of homes in each fold
    for fold_num in range(5):
        num_homes[fold_num] = gt[fold_num]['hvac'].reshape(-1, 1, 112,24).shape[0]
    homes = pd.Series(num_homes).sum()
    
    # for Constantly On appliances: hvac and fridge
#     for appliance in ['hvac', 'fridge']:
#         overall[appliance] = 0
#         error[appliance] = {}
#         for fold_num in range(5):
#             error[appliance][fold_num] = mean_absolute_error(pred[fold_num][appliance].reshape(-1, 24), 
#                                                              gt[fold_num][appliance].reshape(-1, 24))
#             overall[appliance] += error[appliance][fold_num]*num_homes[fold_num]
#         overall[appliance] /= homes
        
    # for ON/OFF appliances: dryer, dishwasher and microwave
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        error[appliance] = {}
        overall[appliance] = 0                                                                
        for fold_num in range(5):
            error[appliance][fold_num] = onoff_error(pred[fold_num][appliance].reshape(-1, 24), 
                                                     gt[fold_num][appliance].reshape(-1, 24), threshold[appliance])
            overall[appliance] += error[appliance][fold_num]*num_homes[fold_num]
        overall[appliance] /= homes
    
    
    return error, overall

In [8]:
dataset = 1

In [9]:
best_rnn_individual = np.load("../code/baseline/result/rnn-individual-param-{}.npy".format(dataset)).item()
best_rnn_tree = np.load("../code/baseline/result/rnn-tree-param-{}.npy".format(dataset)).item()

In [10]:
# get prediction for rnn individual
rnn_individual_pred = {}
rnn_individual_error = {}
for fold_num in range(5):
    rnn_individual_pred[fold_num] = {}
    rnn_individual_error[fold_num] = {}
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        cell_type = best_rnn_individual[appliance][fold_num]['cell_type']
        hidden_size = best_rnn_individual[appliance][fold_num]['hidden_size']
        num_layers = best_rnn_individual[appliance][fold_num]['num_layers']
        lr = best_rnn_individual[appliance][fold_num]['lr']
        iters = best_rnn_individual[appliance][fold_num]['iters']
        bidirectional = best_rnn_individual[appliance][fold_num]['bidirectional']

        directory = "../code/baseline/rnn-tree/{}/{}/{}/{}/{}/{}/{}/3000/0.0/".format(dataset, fold_num, cell_type,
                                                                                                    hidden_size,
                                                                                                    num_layers,
                                                                                                    bidirectional,
                                                                                                    lr)
        filename = "test-pred-{}-{}-{}-{}-{}-{}-{}-3000-0.0-[\'{}\'].npy".format(fold_num, dataset, cell_type, hidden_size, num_layers,
                                                                                 bidirectional, lr, appliance)

        full_path = directory + filename
        rnn_individual_pred[fold_num][appliance] = np.asarray(np.load(full_path).item()[iters]).reshape(-1, 1, 112, 24)

In [11]:
# get prediction for rnn tree
rnn_tree_pred = {}
rnn_tree_error = {}
for fold_num in range(5):
    # get prediction for rnn tree
    rnn_tree_pred[fold_num] = {}
#     rnn_tree_error[fold_num] = {}
    cell_type = best_rnn_tree[fold_num]['cell_type']
    hidden_size = best_rnn_tree[fold_num]['hidden_size']
    num_layers = best_rnn_tree[fold_num]['num_layers']
    lr = best_rnn_tree[fold_num]['lr']
    iters = best_rnn_tree[fold_num]['iters']
    bidirectional = best_rnn_tree[fold_num]['bidirectional']
    order = best_rnn_tree[fold_num]['order']
    directory = "../code/baseline/rnn-tree/{}/{}/{}/{}/{}/{}/{}/3000/0.0/".format(dataset, fold_num, cell_type,
                                                                                                hidden_size,
                                                                                                num_layers,
                                                                                                bidirectional,
                                                                                                float(lr))
    filename = "test-pred-{}-{}-{}-{}-{}-{}-{}-3000-0.0-[\'{}\'].npy".format(fold_num, dataset, cell_type, hidden_size, num_layers,
                                                                             bidirectional, float(lr), str(order)[2:-2])

    full_path = directory + filename
    rnn_pred = np.asarray(np.load(full_path).item()[iters])
    

    for idx, appliance in enumerate(order):
        rnn_tree_pred[fold_num][appliance] = rnn_pred[idx].reshape(-1, 1, 112,24)

In [12]:
pd.DataFrame(calculate_error(rnn_individual_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,1196.418182,347.71572,35.16587,444.993245,59.136712
1,1198.23622,358.746039,43.786497,458.810986,55.31246
2,1190.082446,367.253368,36.294608,436.912402,47.882055
3,1313.36524,376.183166,41.699081,550.634354,47.906568
4,1243.650715,304.606769,37.210239,456.657575,70.219807


In [13]:
pd.DataFrame(calculate_error(rnn_tree_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,1144.21911,347.798758,39.609324,740.633321,53.66041
1,922.163193,358.737232,41.782622,591.087308,54.930873
2,1002.994793,367.253368,33.701705,683.364317,47.329837
3,984.426171,376.277118,40.517034,699.130496,47.518651
4,910.129156,304.606769,37.712402,643.47972,71.175344


In [14]:
pd.Series(calculate_error(rnn_tree_pred, test_gt, threshold)[1])

dr        994.124979
dw        351.243258
fridge     38.651379
hvac      671.545913
mw         54.792906
dtype: float64

In [15]:
pd.Series(calculate_error(rnn_individual_pred, test_gt, threshold)[1])

dr        1226.875342
dw         351.210014
fridge      38.812924
hvac       468.600411
mw          56.004118
dtype: float64

## Individual CNN and TreeCNN

In [16]:
cnn_individual_valid_pred = {}
gid = 0
c = 0
for fold_num in range(5):
    cnn_individual_valid_pred[fold_num] = {}
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        cnn_individual_valid_pred[fold_num][appliance] = {}
        for lr in [0.001, 0.01, 0.1]:
            cnn_individual_valid_pred[fold_num][appliance][lr] = {}
            for iters in [200000]:
            
                directory = "../code/baseline/cnn-tree/{}/{}/{}/{}/0.0/".format(dataset, fold_num, lr, iters)
                filename = "valid-pred-[\'{}\'].npy".format(appliance)
                
                full_path = directory + filename
                my_file = Path(full_path)
                if not my_file.exists():
                    print("CUDA_VISIBLE_DEVICES={} python cnn-tree.py {} {} {} 0 {} {} &".format(gid, dataset, lr, iters, fold_num, appliance))
                    gid += 1
                    if gid == 4:
                        gid = 0
                    c += 1
                else:
#                     continue
                    k = np.load(full_path).item()
                    for it in range(1000, 200000+1, 1000):
                        cnn_individual_valid_pred[fold_num][appliance][lr][it] = k[it][0]
print(c)

0


In [37]:
# cnn_individual_best_param = {}
# for fold_num in range(5):
#     cnn_individual_best_param[fold_num] = {}
#     for appliance in ['hvac', 'fridge']:
#         cnn_individual_best_param[fold_num][appliance] = {}
#         min_error = np.inf
#         for lr in [0.001, 0.01, 0.1]:
#             for it in range(1000, 200000+1, 1000):
#                 error = mean_absolute_error(cnn_individual_valid_pred[fold_num][appliance][lr][it].reshape(-1, 24), valid_gt[fold_num][appliance].reshape(-1, 24))
#                 if error < min_error:
#                     cnn_individual_best_param[fold_num][appliance]['lr'] = lr
#                     cnn_individual_best_param[fold_num][appliance]['iters'] = it
#                     min_error = error

In [17]:
cnn_individual_best_param = {}
for fold_num in range(5):
    cnn_individual_best_param[fold_num] = {}
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        cnn_individual_best_param[fold_num][appliance] = {}
        min_error = np.inf
        for lr in [0.001, 0.01, 0.1]:
            for it in range(1000, 200000+1, 1000):
                error = onoff_error(cnn_individual_valid_pred[fold_num][appliance][lr][it].reshape(-1, 24), valid_gt[fold_num][appliance].reshape(-1, 24), threshold[appliance])
                if error < min_error:
                    cnn_individual_best_param[fold_num][appliance]['lr'] = lr
                    cnn_individual_best_param[fold_num][appliance]['iters'] = it
                    min_error = error

In [18]:
np.save("../code/baseline/result/cnn-individual-new-param-{}.npy".format(dataset), cnn_individual_best_param)

In [19]:
gid = 0
srv = 1
c = 0
cmd = {}
cnn_tree_valid_pred = {}
num_iterations = 20000
lr = 0.01
p = 0
for fold_num in range(5):
    cnn_tree_valid_pred[fold_num] = {}
    for lr in [0.01]:
        cnn_tree_valid_pred[fold_num][lr] = {}
        for order in list(itertools.permutations(['hvac', 'fridge', 'dr', 'dw', 'mw'])):
            
            if order[0] == 'hvac':
                continue
            
            cnn_tree_valid_pred[fold_num][lr][order] = {}


            o = "\', \'".join(str(x) for x in order)
            directory = "../code/baseline/cnn-tree/{}/{}/{}/20000/0.0/".format(dataset, fold_num, lr)
            filename = "valid-pred-[\'{}\'].npy".format(o)

            full_path = directory + filename
            my_file = Path(full_path)
            if not my_file.exists():
                o = " ".join(str(x) for x in order)
                line = ("CUDA_VISIBLE_DEVICES={} python cnn-tree.py 3 {} {} 0 {} {} &".format(gid, lr, num_iterations, fold_num, o))
                print(line)

                c += 1
                if c%1 == 0:
                    gid+=1
                if gid == 4:
                    gid = 0
                    srv += 1
            else:
                k = np.load(full_path).item()
                for it in range(1000, 20001, 1000):
                    cnn_tree_valid_pred[fold_num][lr][order][it] = {}
                    for idx, appliance in enumerate(order):
                        cnn_tree_valid_pred[fold_num][lr][order][it][appliance] = k[it][idx]

print(c)                               

0


In [20]:
cnn_tree_best_param = {}
for fold_num in range(5):
    cnn_tree_best_param[fold_num] = {}
    min_error = np.inf
    for lr in [0.01]:
        for order in list(itertools.permutations(['hvac', 'fridge', 'dr', 'dw', 'mw'])):
            if order[0] == 'hvac':
                continue
            for it in range(1000, 20001, 1000):
                error = 0
                for idx, appliance in enumerate(order):
#                     if appliance in ['hvac', 'fridge']:
#                         error += mean_absolute_error(cnn_tree_valid_pred[fold_num][lr][order][it][appliance].reshape(-1, 24),
#                                                     valid_gt[fold_num][appliance].reshape(-1, 24))
#                     else:
                    error += onoff_error(cnn_tree_valid_pred[fold_num][lr][order][it][appliance].reshape(-1, 24),
                                                valid_gt[fold_num][appliance].reshape(-1, 24), threshold[appliance])
                if error < min_error:
                    min_error = error
                    cnn_tree_best_param[fold_num]['lr'] = lr
                    cnn_tree_best_param[fold_num]['order'] = order
                    cnn_tree_best_param[fold_num]['iters'] = it
                
                    

In [21]:
np.save("../code/baseline/result/cnn-tree-new-param-{}.npy".format(dataset), cnn_tree_best_param)

In [22]:
# load the best for cnn model
dataset = 1
best_cnn_individual = np.load("../code/baseline/result/cnn-individual-new-param-{}.npy".format(dataset)).item()
best_cnn_tree = np.load("../code/baseline/result/cnn-tree-new-param-{}.npy".format(dataset)).item()

In [23]:
# get prediction for cnn individual
cnn_individual_pred = {}
for fold_num in range(5):
    cnn_individual_pred[fold_num] = {}
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        
        lr = best_cnn_individual[fold_num][appliance]['lr']
        iters = best_cnn_individual[fold_num][appliance]['iters']

        directory = "../code/baseline/cnn-tree/{}/{}/{}/200000/0.0/".format(dataset, fold_num,lr)
        filename = "test-pred-[\'{}\'].npy".format(appliance)

        full_path = directory + filename
        cnn_individual_pred[fold_num][appliance] = np.asarray(np.load(full_path).item()[iters]).reshape(-1, 1, 112, 24)

In [24]:
# get prediction for cnn tree
cnn_tree_pred = {}
for fold_num in range(5):
    cnn_tree_pred[fold_num] = {}
        
    lr = best_cnn_tree[fold_num]['lr']
    iters = best_cnn_tree[fold_num]['iters']
    order = best_cnn_tree[fold_num]['order']

    o = "\', \'".join(str(x) for x in order)
    directory = "../code/baseline/cnn-tree/{}/{}/{}/20000/0.0/".format(dataset, fold_num,lr)
    filename = "test-pred-[\'{}\'].npy".format(o)

    full_path = directory + filename
    cnn_pred = np.asarray(np.load(full_path).item()[iters])
    
    for idx, appliance in enumerate(order):
        cnn_tree_pred[fold_num][appliance] = cnn_pred[idx].reshape(-1, 1, 112,24)
    

In [25]:
new_error = {}
new_error['Individual RNN'] = calculate_error(rnn_individual_pred, test_gt, threshold)[1]
new_error['Tree RNN'] = calculate_error(rnn_tree_pred, test_gt, threshold)[1]
new_error['Individual CNN'] = calculate_error(cnn_individual_pred, test_gt, threshold)[1]
new_error['Tree CNN'] = calculate_error(cnn_tree_pred, test_gt, threshold)[1]

In [26]:
pd.DataFrame(new_error)

Unnamed: 0,Individual CNN,Individual RNN,Tree CNN,Tree RNN
dr,594.526163,1226.875342,582.596692,994.124979
dw,338.347666,351.210014,326.491795,351.243258
fridge,38.265516,38.812924,44.650674,38.651379
hvac,297.020788,468.600411,299.490306,671.545913
mw,52.622841,56.004118,51.358356,54.792906


In [27]:
pd.DataFrame(calculate_error(cnn_tree_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,378.656361,347.049247,37.122359,282.133492,48.220002
1,697.105043,316.1037,44.383654,304.747196,40.620868
2,865.405661,355.784971,43.684598,232.870444,33.55198
3,466.851354,354.733526,51.743456,265.511647,36.786395
4,383.554244,211.601687,45.550721,297.924211,58.96484


In [34]:
pd.Series(calculate_error(rnn_individual_pred, test_gt, threshold)[1])

dr        1179.818965
dw         341.548343
fridge      37.817167
hvac       462.937325
mw          19.622376
dtype: float64

In [35]:
pd.DataFrame(calculate_error(rnn_tree_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,1160.995494,331.630382,33.887644,413.234535,19.252954
1,1160.334033,350.23934,41.577924,511.260277,21.33013
2,1158.480403,359.846892,33.147392,447.280023,10.658043
3,1235.319879,367.036913,38.226928,535.84963,12.037115
4,1191.458905,297.748852,36.491569,428.950569,22.298461


In [36]:
pd.DataFrame(calculate_error(cnn_individual_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,808.980404,326.854814,36.265273,426.088388,17.321685
1,882.535487,340.169802,42.361022,409.628172,21.999674
2,969.5983,351.820964,35.161585,397.086011,10.038362
3,1103.052577,366.925923,40.590406,464.559257,10.94248
4,1008.846665,298.172872,36.59322,391.00363,27.689349


In [127]:
pd.DataFrame(calculate_error(cnn_tree_pred, test_gt, threshold)[0])

Unnamed: 0,dr,dw,fridge,hvac,mw
0,380.361517,221.357761,40.789443,183.317879,32.954544
1,654.570013,308.036766,36.696303,138.862061,38.031077
2,832.800544,348.930867,35.966973,147.254607,29.218336
3,396.865739,338.646366,45.409241,176.136952,34.278379
4,337.580422,191.665894,39.705588,171.053769,57.125152
