# import

In [None]:
import os
import sys
import math
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


sys.path.append('../..')
import model.Baseline as baseline
import model.my_model as mymodel
import model.util_loss as util_ls
import model.util_dataloader as util_dl
import model.util_model as util_md
import data_process.util_data as util_dt


device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")
print("The model will be running on", device, "device")

# local
# file_save_dir = r'C:\Users\29492\Desktop\exp12'
# file_load_dir = r'D:\dataset\DataFrames\UKDALE'
# server
file_save_dir = r'../../results'
file_load_dir = r'../../data/UKDALE'

dm = util_md.DictManager(file_save_dir)
mwm = util_md.ModelWeightManager(file_save_dir)

# prepare data

In [None]:
dataframe_path = os.path.join(file_load_dir, r'house_1.csv')
df = pd.read_csv(dataframe_path)
df.set_index('time', inplace=True)
# df.describe(percentiles=[0.10,0.25,0.50,0.75,0.8,0.85,0.9,0.95])


house_1_cols = [
 'boiler-2',
 'solar_thermal_pump-3',
 'laptop-4',
 'washing_machine-5',
 'dishwasher-6',
 'tv-7',
 'kitchen_lights-8',
 'htpc-9',
 'kettle-10',
 'toaster-11',
 'fridge-12',
 'microwave-13',
 'lcd_office-14',
 'hifi_office-15',
 'breadmaker-16',
 'amp_livingroom-17',
 'adsl_router-18',
 'livingroom_s_lamp-19',
 'soldering_iron-20',
 'gigE_&_USBhub-21',
 'hoover-22',
 'kitchen_dt_lamp-23',
 'bedroom_ds_lamp-24',
 'lighting_circuit-25',
 'livingroom_s_lamp2-26',
 'iPad_charger-27',
 'subwoofer_livingroom-28',
 'livingroom_lamp_tv-29',
 'DAB_radio_livingroom-30',
 'kitchen_lamp2-31',
 'kitchen_phone&stereo-32',
 'utilityrm_lamp-33',
 'samsung_charger-34',
 'bedroom_d_lamp-35',
 'coffee_machine-36',
 'kitchen_radio-37',
 'bedroom_chargers-38',
 'hair_dryer-39',
 'straighteners-40',
 'iron-41',
 'gas_oven-42',
 'data_logger_pc-43',
 'childs_table_lamp-44',
 'childs_ds_lamp-45',
 'baby_monitor_tx-46',
 'battery_charger-47',
 'office_lamp1-48',
 'office_lamp2-49',
 'office_lamp3-50',
 'office_pc-51',
 'office_fan-52',
 'LED_printer-53']
filtered_columns = df[house_1_cols]
# filtered_columns.describe(percentiles=[0.10,0.25,0.50,0.6,0.75,0.8,0.85,0.9,0.95])


apps_name = filtered_columns.columns.to_list()
print(apps_name)


filtered_columns['sum'] = filtered_columns.sum(axis=1)
norm_data, min_val, max_val = util_dt.normalization_interval(filtered_columns.to_numpy(), 0, 1)
main = norm_data[:,-1]
apps = norm_data[:,:-1]
print(f'min: {min_val}; max: {max_val}')
print(np.min(main), np.max(main))
print(np.min(apps), np.max(apps))
util_dt.print_shape(main, apps)


on_threshold = 30
sliding_window_len = 599

In [None]:
in_seq_l = sliding_window_len
out_dim = len(apps_name)
print(f'in_seq_l: {in_seq_l}; out_dim: {out_dim}')

loss_factor=max_val-min_val
print(f'loss_factor:{loss_factor}')


metric_dict_one2one = {'MAE(apps)':util_ls.MAE(single_class=True),
                'SAE':util_ls.SignalAggregateError(single_class=True, period_len=450),
                'MAE(offon)':util_ls.MAE_off_on(single_class=True)}

metric_dict_one2all = {'MAE(apps)':util_ls.MAE(single_class=False),
                       'MAE(mean)':util_ls.MAE(single_class=True),
                'SAE(apps)':util_ls.SignalAggregateError(single_class=False, period_len=450),
                'SAE(mean)':util_ls.SignalAggregateError(single_class=True, period_len=450),
                'MAE(offon)':util_ls.MAE_off_on(single_class=False)}


apps_to_train = ['laptop-4', 'washing_machine-5', 'kettle-10', 'toaster-11', 'fridge-12', 'microwave-13']
for app in apps_to_train:
    print(apps_name.index(app), app)

# S2P data

In [None]:
w_main, w_apps = util_dt.generate_window_samples(input_1=main, input_2=apps, window_size_1=sliding_window_len,
                                     window_size_2=1, offset=math.floor(sliding_window_len/2))
w_main = np.expand_dims(w_main, axis=1)
w_apps_sum = w_apps.sum(axis=-1)
w_apps_sum = np.expand_dims(w_apps_sum, axis=1)
w_apps_on = (w_apps>(on_threshold-min_val)/(max_val-min_val)).astype('int')
util_dt.print_shape(w_main, w_apps, w_apps_sum, w_apps_on)

# S2P

In [None]:
epoch_n = 10
b_s = 450*10
eval_b_s = 450*10

for app in apps_to_train:
    app_index = apps_name.index(app)
    print(app, app_index)
    model = baseline.Seq2Point(in_seq_l, 1)
    dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                                      torch.from_numpy(w_apps[:, app_index:app_index+1]).to(torch.float32),
                                      torch.from_numpy(w_apps_on[:, app_index:app_index+1]).to(torch.float32))
    dataset.describe()
    subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
    trainset, valset = util_dl.split_to_DataLoader_randomly(dataset=subset_1, split_ratio=[0.75,0.25], batch_size=b_s, shuffle_flag=True)
    testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

    score_train_vali = util_md.train_val(device, model, epoch_n, trainset, valset,
                                         loss_factor=loss_factor, return_metric_dict=metric_dict_one2one, decay_params=[2,0.5])
    del trainset, valset
    score_test = util_md.train_val(device, model, valset=testset, loss_factor=loss_factor,
                        return_metric_dict=metric_dict_one2one)
    del testset
    del dataset, subset_1, subset_2
    total_record = {'train_vali':score_train_vali, 'test':score_test}

    filename = '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='S2P',appname=app,date='0101')
    dm.save_dict(total_record, filename)
    mwm.save_model_weight(model, filename)


In [None]:
eval_b_s = 450*4
print('Batch size: ',b_s)
for app in apps_to_train:
    app_index = apps_name.index(app)
    print(app, app_index)
    model = baseline.Seq2Point(in_seq_l, 1)
    mwm.load_model_weight(model, '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='S2P',appname=app,date='1231'))
    dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                                      torch.from_numpy(w_apps[:, app_index:app_index+1]).to(torch.float32),
                                      torch.from_numpy(w_apps_on[:, app_index:app_index+1]).to(torch.float32))
    dataset.describe()
    subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
    testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

    score_test = util_md.train_val(device, model, valset=testset, loss_factor=loss_factor,
                        return_metric_dict=metric_dict_one2one)

# SGN

In [None]:
epoch_n = 10
b_s = 450*20
eval_b_s = 450*20



print('Batch size: ',b_s)
for app in apps_to_train:
    app_index = apps_name.index(app)
    print(app, app_index)
    model = baseline.SGN(in_seq_l)
    dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                                      torch.from_numpy(w_apps[:, app_index:app_index+1]).to(torch.float32),
                                      torch.from_numpy(w_apps_on[:, app_index:app_index+1]).to(torch.float32))
    dataset.describe()
    subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
    trainset, valset = util_dl.split_to_DataLoader_randomly(dataset=subset_1, split_ratio=[0.75,0.25], batch_size=b_s, shuffle_flag=True)
    testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

    score_train_vali, _ = util_md.train_val_DoubleTask(device, model, epoch_n, trainset, valset,
                                                       loss_reg_factor=loss_factor, return_metric_reg_dict=metric_dict_one2one, decay_params=[2,0.5])
    del trainset, valset
    score_test, _ = util_md.train_val_DoubleTask(device, model, valset=testset, loss_reg_factor=loss_factor,
                        return_metric_reg_dict=metric_dict_one2one)
    del testset
    del dataset, subset_1, subset_2
    total_record = {'train_vali':score_train_vali, 'test':score_test}

    filename = '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='SGN',
                                                           appname=app,date='0422')
    dm.save_dict(total_record, filename)
    mwm.save_model_weight(model, filename)

# Mark final

In [None]:
epoch_n = 50
b_s = 450*10
eval_b_s = 450*20

exp_num = 5

dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                            torch.from_numpy(w_apps_on).to(torch.float32),
                            torch.from_numpy(w_apps_sum).to(torch.float32),
                            torch.from_numpy(w_apps).to(torch.float32))
dataset.describe()
subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
trainset, valset = util_dl.split_to_DataLoader_randomly(dataset=subset_1, split_ratio=[0.75,0.25], batch_size=b_s, shuffle_flag=True)
testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

for i in range(exp_num):

    model = mymodel.Model_final(in_seq_l, out_dim)

    score_train_vali, _ = util_md.train_val_Mark_DoubleTask_AutomaticWeightedLoss(device, model, epoch_n, trainset, valset,
                                        loss_reg_factor=loss_factor, return_metric_reg_dict=metric_dict_one2all, decay_params=[2,0.5])

    score_test, _  = util_md.train_val_Mark_DoubleTask_AutomaticWeightedLoss(device, model, valset=testset, loss_reg_factor=loss_factor,
                                        return_metric_reg_dict=metric_dict_one2all)

    total_record = {'train_vali':score_train_vali, 'test':score_test}
    filename = '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='Final',appname='all',date=f'0101-{i}')
    dm.save_dict(total_record, filename)
    mwm.save_model_weight(model, filename)

del trainset, valset, testset

In [None]:
model = mymodel.Model_final(in_seq_l, out_dim)
mwm.load_model_weight(model,'UKDALE_Final_all_0720010')
score_test, _ = util_md.train_val_Mark_DoubleTask_AutomaticWeightedLoss(device, model, valset=testset, loss_reg_factor=loss_factor,
                    return_metric_reg_dict=metric_dict_one2all)

In [None]:
infodf['MAE(apps)']=score_test['val']['MAE(apps)'].squeeze()
infodf['SAE(apps)']=score_test['val']['SAE(apps)'].squeeze()
mae01 = score_test['val']['MAE(offon)'].squeeze()
infodf['MAE(off)']=mae01[0]
infodf['MAE(on)']=mae01[1]

In [None]:
infodf

# S2S

In [None]:
epoch_n = 10
b_s = 450*10
eval_b_s = 450*10


for app in apps_to_train:
    app_index = apps_name.index(app)
    print(app, app_index)
    model = baseline.Seq2Seq(in_seq_l, in_seq_l)
    w_main, w_apps = util_dt.generate_window_samples(input_1=main, input_2=apps[:,app_index], window_size_1=sliding_window_len,
                                     window_size_2=sliding_window_len, offset=0)
    w_main = np.expand_dims(w_main, axis=1)
    # w_apps= np.expand_dims(w_apps, axis=1)
    w_apps_on = (w_apps>(on_threshold-min_val)/(max_val-min_val)).astype('int')
    dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                                      torch.from_numpy(w_apps).to(torch.float32),
                                      torch.from_numpy(w_apps_on).to(torch.float32))
    dataset.describe()
    subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
    trainset, valset = util_dl.split_to_DataLoader_randomly(dataset=subset_1, split_ratio=[0.75,0.25], batch_size=b_s, shuffle_flag=True)
    testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

    score_train_vali = util_md.train_val(device, model, epoch_n, trainset, valset, loss_factor=loss_factor, decay_params=[2,0.5],
                                         return_metric_dict=metric_dict_one2one)
    del trainset, valset
    score_test = util_md.train_val(device, model, valset=testset, loss_factor=loss_factor,
                        return_metric_dict=metric_dict_one2one)
    del testset
    del dataset, subset_1, subset_2
    total_record = {'train_vali':score_train_vali, 'test':score_test}

    filename = '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='S2S',appname=app,date='0101')
    dm.save_dict(total_record, filename)
    mwm.save_model_weight(model, filename)

# DAE

In [None]:
epoch_n = 10
b_s = 450*10
eval_b_s = 450*10


for app in apps_to_train:
    app_index = apps_name.index(app)
    print(app, app_index)
    model = baseline.DAE(sliding_window_len)
    w_main, w_apps = util_dt.generate_window_samples(input_1=main, input_2=apps[:,app_index], window_size_1=sliding_window_len,
                                     window_size_2=sliding_window_len, offset=0)
    w_main = np.expand_dims(w_main, axis=1)
    w_apps= np.expand_dims(w_apps, axis=1)
    w_apps_on = (w_apps>(on_threshold-min_val)/(max_val-min_val)).astype('int')
    dataset = util_dl.VariableDataset(torch.from_numpy(w_main).to(torch.float32),
                                      torch.from_numpy(w_apps).to(torch.float32),
                                      torch.from_numpy(w_apps_on).to(torch.float32))
    dataset.describe()
    subset_1, subset_2 = util_dl.split_Dataset_orderly(dataset, [0.8,0.2])
    del dataset
    trainset, valset = util_dl.split_to_DataLoader_randomly(dataset=subset_1, split_ratio=[0.75,0.25], batch_size=b_s, shuffle_flag=True)
    testset = util_dl.just_to_DataLoader(dataset=subset_2, batch_size=eval_b_s, shuffle_flag=False)

    score_train_vali = util_md.train_val(device, model, epoch_n, trainset, valset, loss_factor=loss_factor, decay_params=[2,0.5],
                                         return_metric_dict=metric_dict_one2one, lr=5e-4)
    del trainset, valset
    score_test = util_md.train_val(device, model, valset=testset, loss_factor=loss_factor,
                        return_metric_dict=metric_dict_one2one)
    del testset
    del w_main, w_apps, w_apps_on
    total_record = {'train_vali':score_train_vali, 'test':score_test}

    filename = '{dataset}_{model}_{appname}_{date}'.format(dataset='UKDALE',model='DAE',appname=app,date='0101')
    dm.save_dict(total_record, filename)
    mwm.save_model_weight(model, filename)