In [69]:
import torch
from torch.autograd import Variable
import numpy as np
import torch.nn as nn
from torchvision import datasets,transforms
import torch.nn.functional as F
import sys
sys.path.append("../code/")
from dataloader import APPLIANCE_ORDER, get_train_test
from sklearn.metrics import mean_absolute_error,mean_squared_error

import os
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
sys.path.append('../code/')
%matplotlib inline
import itertools
from pathlib import Path

In [107]:
frac = {}
for idx, appliance in enumerate(APPLIANCE_ORDER[:-1]):
#     print(appliance, mean_appliance[idx]/mean_appliance[0])
    frac[appliance] = mean_appliance[idx]/mean_appliance[0]

In [42]:
# calculate gt
test_gt = {}
valid_gt = {}
for dataset in [1, 3]:
    test_gt[dataset] = {}
    valid_gt[dataset] = {}
    for fold_num in range(5):
        test_gt[dataset][fold_num] = {}
        valid_gt[dataset][fold_num] = {}
        train, test = get_train_test(dataset, 5, fold_num)
        valid = train[int(0.8*len(train)):].copy()
        for idx, appliance in enumerate(APPLIANCE_ORDER[:-1]):
            test_gt[dataset][fold_num][appliance] = test[:, idx]
            valid_gt[dataset][fold_num][appliance] = valid[:, idx]

In [154]:
def compute_rms_pec(pred, gt, agg):
    
    pred_frac = np.divide(pred.sum(axis=2), agg.sum(axis=2))*100
    gt_frac = np.divide(gt.sum(axis=2), agg.sum(axis=2))*100
    
    return mean_squared_error(pred_frac, gt_frac)

In [155]:
def calculate_error(pred, gt):
    error = {}
    overall = {}
    num_homes = {}
    
    # calculte number of homes in each fold
    for fold_num in range(5):
        num_homes[fold_num] = gt[fold_num]['aggregate'].reshape(-1, 1, 112,24).shape[0]
    homes = pd.Series(num_homes).sum()
    
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        error[appliance] = {}
        overall[appliance] = 0                                                                
        for fold_num in range(5):
            error[appliance][fold_num] = compute_rms_pec(pred[fold_num][appliance].reshape(-1, 112, 24), 
                                                     gt[fold_num][appliance].reshape(-1,112, 24), 
                                                        gt[fold_num]['aggregate'].reshape(-1, 112, 24))
            overall[appliance] += error[appliance][fold_num]*num_homes[fold_num]
        overall[appliance] /= homes
    
    model_error = 0
    for appliance in ['hvac', 'fridge', 'dr', 'dw', 'mw']:
        model_error += overall[appliance]*frac[appliance]
    
    return error, overall, model_error

# STF

In [28]:
r = {}
mean_r = {}
for dataset in [1, 3]:
    r[dataset] = {}
    mean_r[dataset] = {}
    for cur_fold in range(5):
        r[dataset][cur_fold] = {}
        mean_r[dataset][cur_fold] = {}
        for num_latent in range(1, 21):
            r[dataset][cur_fold][num_latent] = {}
            mean_r[dataset][cur_fold][num_latent] = {}
            for lr in [0.01, 0.1 ,1 ,2]:
                lr = float(lr)
                r[dataset][cur_fold][num_latent][lr] = {}
                mean_r[dataset][cur_fold][num_latent][lr] = {}
                for iters in range(100, 2600, 400):
                    r[dataset][cur_fold][num_latent][lr][iters] = np.load("../code/baseline/stf/{}/valid/stf-pred-{}-{}-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent, lr, iters))
                    

In [108]:
error = {}
best_p = {}
for dataset in [1, 3]:
    error[dataset] = {}
    best_p[dataset] = {}
    for cur_fold in range(5):
        error[dataset][cur_fold] = np.inf
        best_p[dataset][cur_fold] = {}

        for num_latent in range(1, 21):
            for lr in [0.01, 0.1, 1, 2]:
                for iters in range(100, 2600, 400):
                    cur_error = 0
                    for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):

                        cur_error += frac[appliance]*compute_rms_pec(r[dataset][cur_fold][num_latent][lr][iters][:, idx],
                                                    valid_gt[dataset][cur_fold][appliance], valid_gt[dataset][cur_fold]['aggregate'])
                        
                    if cur_error < error[dataset][cur_fold]:
                        error[dataset][cur_fold] = cur_error
                        best_p[dataset][cur_fold]['num_latent'] = num_latent
                        best_p[dataset][cur_fold]['lr'] = lr
                        best_p[dataset][cur_fold]['iters'] = iters

In [137]:
test_pred = {}
for dataset in [1, 3]:
    test_pred[dataset] = {}
    for cur_fold in range(5):
        test_pred[dataset][cur_fold] = {}
        num_latent = best_p[dataset][cur_fold]['num_latent']
        lr = float(best_p[dataset][cur_fold]['lr'])
        iters = best_p[dataset][cur_fold]['iters'] 
        
#         print("python baseline-stf-nested.py {} {} {} {} {} &".format(dataset, cur_fold, num_latent, lr, iters))
        pred = np.load("../code/baseline/stf/{}/test/stf-test-pred-{}-{}-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent, lr, iters))
        for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):
            test_pred[dataset][cur_fold][appliance] = pred[:, idx]
        

In [140]:
error

{1: {0: 29.571722018336072,
  1: 39.613530039506877,
  2: 36.737164497131957,
  3: 35.640345654782415,
  4: 7.0789894516840253},
 3: {0: 3.5350599938579426,
  1: 3.1145544050109448,
  2: 3.3687931658135155,
  3: 3.4706774548585657,
  4: 3.2146343109090236}}

In [159]:
calculate_error(test_pred[1], test_gt[1])[2]

2400.5244493336786

# MTF

In [113]:
mtf_r = {}
mean_r = {}
for dataset in [1, 3]:
    mtf_r[dataset] = {}
    for cur_fold in range(5):
        mtf_r[dataset][cur_fold] = {}
        for num_latent in range(1, 21):
            mtf_r[dataset][cur_fold][num_latent] = {}
            for lr in [0.01, 0.1 ,1 ,2]:
                lr = float(lr)
                mtf_r[dataset][cur_fold][num_latent][lr] = {}
                for iters in range(100, 2600, 400):
                    mtf_r[dataset][cur_fold][num_latent][lr][iters] = np.load("../code/baseline/mtf/{}/valid/mtf-pred-{}-{}-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent, lr, iters))
                    

In [114]:
mtf_error = {}
mtf_best_p = {}
for dataset in [1, 3]:
    mtf_error[dataset] = {}
    mtf_best_p[dataset] = {}
    for cur_fold in range(5):
        mtf_error[dataset][cur_fold] = np.inf
        mtf_best_p[dataset][cur_fold] = {}

        for num_latent in range(1, 21):
            for lr in [0.01, 0.1, 1, 2]:
                for iters in range(100, 2600, 400):
                    cur_error = 0
                    for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):

                        cur_error += frac[appliance]*compute_rms_pec(mtf_r[dataset][cur_fold][num_latent][lr][iters][:, idx],
                                                    valid_gt[dataset][cur_fold][appliance], valid_gt[dataset][cur_fold]['aggregate'])
                        
                    if cur_error < mtf_error[dataset][cur_fold]:
                        mtf_error[dataset][cur_fold] = cur_error
                        mtf_best_p[dataset][cur_fold]['num_latent'] = num_latent
                        mtf_best_p[dataset][cur_fold]['lr'] = lr
                        mtf_best_p[dataset][cur_fold]['iters'] = iters

In [166]:
test_pred = {}
for dataset in [1, 3]:
    test_pred[dataset] = {}
    for cur_fold in range(5):
        test_pred[dataset][cur_fold] = {}
        num_latent = mtf_best_p[dataset][cur_fold]['num_latent']
        lr = float(mtf_best_p[dataset][cur_fold]['lr'])
        iters = mtf_best_p[dataset][cur_fold]['iters'] 
        
#         print("python baseline-stf-nested.py {} {} {} {} {} &".format(dataset, cur_fold, num_latent, lr, iters))
        pred = np.load("../code/baseline/mtf/{}/test/mtf-test-pred-{}-{}-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent, lr, iters))
        for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):
            test_pred[dataset][cur_fold][appliance] = pred[:, idx]
        

In [170]:
calculate_error(test_pred[1], test_gt[1])[1]

{'dr': 104.97141649747095,
 'dw': 6.934563450065399,
 'fridge': 50.901917697789699,
 'hvac': 2301.2214597610496,
 'mw': 1.9560603450885887}

# SC

In [172]:
sc_r = {}
for dataset in [1, 3]:
    sc_r[dataset] = {}
    for cur_fold in range(5):
        sc_r[dataset][cur_fold] = {}
        for num_latent in range(1, 51):
            sc_r[dataset][cur_fold][num_latent] = np.load("../code/baseline/sc-non-nested/{}/sc-non-valid-pred-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent))


In [175]:
sc_r[1][0][1].shape

(11, 5, 112, 24)

In [182]:
sc_best_p = {}
sc_best_error = {}
for dataset in [1, 3]:
    sc_best_p[dataset] = {}
    sc_best_error[dataset] = {}
    for cur_fold in range(5):
        sc_best_p[dataset][cur_fold] = {}
        sc_best_error[dataset][cur_fold] = np.inf
        for num_latent in range(1, 51):
            cur_error = 0
            for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):

                cur_error += frac[appliance]*compute_rms_pec(sc_r[dataset][cur_fold][num_latent][:, idx],
                                            valid_gt[dataset][cur_fold][appliance], valid_gt[dataset][cur_fold]['aggregate'])

            if cur_error < sc_best_error[dataset][cur_fold]:
                sc_best_error[dataset][cur_fold] = cur_error
                sc_best_p[dataset][cur_fold]['num_latent'] = num_latent


In [184]:
test_pred = {}
for dataset in [1, 3]:
    test_pred[dataset] = {}
    for cur_fold in range(5):
        test_pred[dataset][cur_fold] = {}
        num_latent = sc_best_p[dataset][cur_fold]['num_latent']

#         print("python baseline-stf-nested.py {} {} {} {} {} &".format(dataset, cur_fold, num_latent, lr, iters))
        pred = np.load("../code/baseline/sc-non-nested/{}/sc-non-test-pred-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent))
        for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):
            test_pred[dataset][cur_fold][appliance] = pred[:, idx]
        

In [186]:
calculate_error(test_pred[1], test_gt[1])[2]

499.82534730931604

# DSC

In [187]:
dsc_r = {}
for dataset in [1, 3]:
    dsc_r[dataset] = {}
    for cur_fold in range(5):
        dsc_r[dataset][cur_fold] = {}
        for num_latent in range(1, 51):
            dsc_r[dataset][cur_fold][num_latent] = {}
            for iters in range(10, 110, 10):
                dsc_r[dataset][cur_fold][num_latent][iters] = np.load("../code/baseline/sc-with-nested/{}/sc-with-valid-pred-{}-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent, iters))


In [None]:
dsc_best_p = {}
dsc_best_error = {}
for dataset in [1, 3]:
    dsc_best_p[dataset] = {}
    dsc_best_error[dataset] = {}
    for cur_fold in range(5):
        dsc_best_p[dataset][cur_fold] = {}
        dsc_best_error[dataset][cur_fold] = np.inf
        for num_latent in range(1, 51):
            for iters in range(10, 110, 10):
                cur_error = 0
                for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):

                    cur_error += frac[appliance]*compute_rms_pec(dsc_r[dataset][cur_fold][num_latent][iters][:, idx],
                                                valid_gt[dataset][cur_fold][appliance], valid_gt[dataset][cur_fold]['aggregate'])

                if cur_error < dsc_best_error[dataset][cur_fold]:
                    dsc_best_error[dataset][cur_fold] = cur_error
                    dsc_best_p[dataset][cur_fold]['num_latent'] = num_latent
                    dsc_best_p[dataset][cur_fold]['iters'] = iters


In [None]:
test_pred = {}
for dataset in [1, 3]:
    test_pred[dataset] = {}
    for cur_fold in range(5):
        test_pred[dataset][cur_fold] = {}
        num_latent = mtf_best_p[dataset][cur_fold]['num_latent']

#         print("python baseline-stf-nested.py {} {} {} {} {} &".format(dataset, cur_fold, num_latent, lr, iters))
        pred = np.load("../code/baseline/sc-non-nested/{}/sc-non-test-pred-{}-{}-{}.npy".format(dataset, dataset, cur_fold, num_latent))
        for idx, appliance in enumerate(APPLIANCE_ORDER[1:-1]):
            test_pred[dataset][cur_fold][appliance] = pred[:, idx]
        

# Individual CNN

# Tree CNN