# Load Agent

Load Agent (extracts the typical devices’ loads)
Predict the device load!


We do not include an explanation for the Load Agent since we do not consider the extracted typical load profile of every shiftable device as informative to the users

## **1. Load And Preprocess Data**

This part's only purpose is to load the data used in the Load Agent. This process is described in detail in the Preparation Agent. 

**Note: When computing the script with another Household than Household 1 you might need to adapt some parameters**

### **1.1 Initialize And Load Python Scripts**

In [1]:
import pandas as pd
import numpy as np
import os
import sqlite3
dir = 'D:/Master BWL HU/3. Semester/Seminar Information Systems/Seminar-Information-Systems-main'
os.chdir(dir)

from helper_functions import Helper
from agents import Preparation_Agent
import pandas as pd

helper = Helper()

dbfile  = "D:/Master BWL HU/3. Semester/Seminar Information Systems/Seminar-Information-Systems-main/home-assistant_Chris_v3.db"


  from pandas import MultiIndex, Int64Index


### **1.2 Set Parameters For Pre-processing Step**

In [2]:
shiftable_devices = ["sensor.shellyplug_s_4022d88961b4_power", "sensor.shellyplug_s_4022d88984b8_power"]

truncation_params = {
    'features': 'all', 
    'factor': 1.5, 
    'verbose': 1
}

scale_params = {
    'features': 'all', 
    'kind': 'MinMax', 
    'verbose': 1
}

aggregate_params = {
    'resample_param': '60T'
}
#update with active appliances attributes_ids
activity_params = {
    'active_appliances': shiftable_devices,
    'threshold': .15
}

time_params = {
    'features': ['hour', 'day_name']
}

activity_lag_params = {
    'features': ['activity'],
    'lags': [24, 48, 72]
}

device_params = {
    'threshold': 1
}

load_pipe_params = {
    'truncate': truncation_params,
    'scale': scale_params,
    'aggregate': aggregate_params,
    'shiftable_devices': shiftable_devices, 
    'device': device_params
}

### **1.3 Pre-process Data For Input In Device_Usage Agent**

In [3]:
# calling the preparation pipelin
import pandas as pd
prep = Preparation_Agent(dbfile, shiftable_devices)
output, scaled, df = prep.pipeline_load(prep.input, load_pipe_params)
output

Unnamed: 0_level_0,sensor.shellyplug_s_4022d88961b4_power,sensor.shellyplug_s_4022d88984b8_power
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-25 18:00:00,0.0,0.000000
2022-12-25 19:00:00,0.0,0.000000
2022-12-25 20:00:00,0.0,0.000000
2022-12-25 21:00:00,0.0,0.000000
2022-12-25 22:00:00,0.0,0.000000
...,...,...
2023-01-09 06:00:00,0.0,0.000000
2023-01-09 07:00:00,0.0,0.000000
2023-01-09 08:00:00,0.0,0.000000
2023-01-09 09:00:00,0.0,27.498667


In [6]:
df = output
df

Unnamed: 0_level_0,sensor.shellyplug_s_4022d88961b4_power,sensor.shellyplug_s_4022d88984b8_power
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-25 18:00:00,0.0,0.000000
2022-12-25 19:00:00,0.0,0.000000
2022-12-25 20:00:00,0.0,0.000000
2022-12-25 21:00:00,0.0,0.000000
2022-12-25 22:00:00,0.0,0.000000
...,...,...
2023-01-09 06:00:00,0.0,0.000000
2023-01-09 07:00:00,0.0,0.000000
2023-01-09 08:00:00,0.0,0.000000
2023-01-09 09:00:00,0.0,27.498667


## **2.  Constructing the Load Agent**

### **2.1 Initialize Agent**

First we define the **Load Agent class**. It takes as input the data generated by the prep.pipeline_usage function computed above.

In [7]:
class Load_Agent:
    def __init__(self, load_input_df):
        self.input = load_input_df

### Truncate start and end dates if observations are missing
Truncating days where there are less than 24 reported hours

In [8]:
# selecting the correct data, identifying device runs, creating load profiles
# -------------------------------------------------------------------------------------------
def prove_start_end_date(self, df, date):
    import pandas as pd

    start_date = (df.index[0]).strftime("%Y-%m-%d")
    end_date = date
    
    if len(df.loc[start_date]) < 24:
        start_date = (pd.to_datetime(start_date) + pd.Timedelta(days=1)).strftime(
            "%Y-%m-%d"
        )
        df = df[start_date:end_date]
    else:
        df = df[:end_date]

    if end_date not in df:
        return df
    
    if len(df.loc[end_date]) < 24:
        end_new = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
            "%Y-%m-%d"
        )
        df = df[:end_new]
    else:
        df = df[:end_date]
    return df

# add to Load agent
setattr(Load_Agent, 'prove_start_end_date', prove_start_end_date)
del prove_start_end_date 

In [9]:
date = '2023-01-08'

In [10]:
Load_Agent_i = Load_Agent(df) 
df = Load_Agent_i.prove_start_end_date(df, date)

In [11]:
df

Unnamed: 0_level_0,sensor.shellyplug_s_4022d88961b4_power,sensor.shellyplug_s_4022d88984b8_power
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-26 00:00:00,0.0,0.0
2022-12-26 01:00:00,0.0,0.0
2022-12-26 02:00:00,0.0,0.0
2022-12-26 03:00:00,0.0,0.0
2022-12-26 04:00:00,0.0,0.0
...,...,...
2023-01-08 19:00:00,0.0,0.0
2023-01-08 20:00:00,0.0,0.0
2023-01-08 21:00:00,0.0,0.0
2023-01-08 22:00:00,0.0,0.0


### Exlude today, use up until yesterday

Only use dates excluding the date we want to predict, so use all up until "yesterday"

In [12]:
def df_yesterday_date(self, df, date):
    import pandas as pd

    yesterday = (pd.to_datetime(date) - pd.Timedelta(days=1)).strftime("%Y-%m-%d")
    return df[:yesterday]
# add to Load agent
setattr(Load_Agent, 'df_yesterday_date', df_yesterday_date)
del df_yesterday_date 

In [13]:
Load_Agent_i = Load_Agent(df) 
df = Load_Agent_i.df_yesterday_date(df, date)
df

Unnamed: 0_level_0,sensor.shellyplug_s_4022d88961b4_power,sensor.shellyplug_s_4022d88984b8_power
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-26 00:00:00,0.0,0.0
2022-12-26 01:00:00,0.0,0.0
2022-12-26 02:00:00,0.0,0.0
2022-12-26 03:00:00,0.0,0.0
2022-12-26 04:00:00,0.0,0.0
...,...,...
2023-01-07 19:00:00,0.0,0.0
2023-01-07 20:00:00,0.0,0.0
2023-01-07 21:00:00,0.0,0.0
2023-01-07 22:00:00,0.0,0.0


In [14]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(df['sensor.shellyplug_s_4022d88984b8_power'])

last_updated
2022-12-26 00:00:00       0.000000
2022-12-26 01:00:00       0.000000
2022-12-26 02:00:00       0.000000
2022-12-26 03:00:00       0.000000
2022-12-26 04:00:00       0.000000
2022-12-26 05:00:00       0.000000
2022-12-26 06:00:00       0.000000
2022-12-26 07:00:00       0.000000
2022-12-26 08:00:00       0.000000
2022-12-26 09:00:00       0.000000
2022-12-26 10:00:00       0.000000
2022-12-26 11:00:00     288.104918
2022-12-26 12:00:00     441.213672
2022-12-26 13:00:00      78.097281
2022-12-26 14:00:00      94.110562
2022-12-26 15:00:00       0.000000
2022-12-26 16:00:00       0.000000
2022-12-26 17:00:00       0.000000
2022-12-26 18:00:00       0.000000
2022-12-26 19:00:00       0.000000
2022-12-26 20:00:00       0.000000
2022-12-26 21:00:00       0.000000
2022-12-26 22:00:00       0.000000
2022-12-26 23:00:00       0.000000
2022-12-27 00:00:00       0.000000
2022-12-27 01:00:00       0.000000
2022-12-27 02:00:00       0.000000
2022-12-27 03:00:00       0.000000
2022-12

### Creating df_hours
Creating raw load profile for each hour of each day
Creating new data frame df_hours

In [15]:
def load_profile_raw(self, df, shiftable_devices):
    import pandas as pd

    hours = []
    for hour in range(1, 25):
        hours.append("h" + str(hour))
    df_hours = {}

    for idx, appliance in enumerate(
        shiftable_devices
    ):
        df_hours[appliance] = pd.DataFrame(index=None, columns=hours)
        column = df[appliance]

        for i in range(len(column)):

            if (i == 0) and (column[0] > 0):
                df_hours[appliance].loc[0, "h" + str(1)] = column[0]

            if (column[i - 1] == 0) and (column[i] > 0):
                for j in range(0, 24):
                    if (i + j) < len(column):
                        if column[i + j] > 0:
                            df_hours[appliance].loc[i, "h" + str(j + 1)] = column[
                                i + j
                            ]
    return df_hours

# add to Load agent
setattr(Load_Agent, 'load_profile_raw', load_profile_raw)
del load_profile_raw 

In [16]:
Load_Agent_i = Load_Agent(df) 
df_hours = Load_Agent_i.load_profile_raw(df, shiftable_devices)

In [17]:
df_hours['sensor.shellyplug_s_4022d88984b8_power']

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
11,288.104918,441.213672,78.097281,94.110562,,,,,,,...,,,,,,,,,,
35,1367.209816,172.397639,101.281841,96.976491,,,,,,,...,,,,,,,,,,
113,221.466839,309.251868,112.707377,,,,,,,,...,,,,,1360.677778,183.523295,77.974684,69.033941,,
131,1360.677778,183.523295,77.974684,69.033941,,,,,,,...,,,,,,,,,,
157,736.927223,81.303515,80.515755,103.77455,1381.232203,96.614206,5.157143,,,,...,,,,,,,,,,
281,5.25,309.161733,81.135623,106.920528,,,,,,,...,,,,,,,,,,


In [18]:
df_hours['sensor.shellyplug_s_4022d88961b4_power']

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
18,795.214932,541.985593,,,,,,,,,...,,319.984146,379.604,,,,,,,
33,319.984146,379.604,,,,,,,,,...,,,,,,,,,,
79,736.494286,984.130769,318.092188,,,,,,,,...,,,,,,,,,,
110,408.513615,45.988321,308.830833,1.119171,,,,,,,...,,,,,,,,,,
175,639.325,823.634615,,,,,,,,,...,,,,,,,,,,


### Cleaning df_hours
Cleaning the new data frame df_hours

In [19]:
def load_profile_cleaned(self, df_hours):
    import numpy as np

    for app in df_hours.keys():
        for i in df_hours[app].index:
            for j in df_hours[app].columns:
                if np.isnan(df_hours[app].loc[i, j]):
                    df_hours[app].loc[i, j:] = 0
    return df_hours

# add to Load agent
setattr(Load_Agent, 'load_profile_cleaned', load_profile_cleaned)
del load_profile_cleaned 

In [20]:
Load_Agent_i = Load_Agent(df) 
df_hours = Load_Agent_i.load_profile_cleaned(df_hours)

In [21]:
df_hours

{'sensor.shellyplug_s_4022d88961b4_power':              h1          h2          h3        h4 h5 h6 h7 h8 h9 h10  ... h15  \
 18   795.214932  541.985593           0         0  0  0  0  0  0   0  ...   0   
 33   319.984146     379.604           0         0  0  0  0  0  0   0  ...   0   
 79   736.494286  984.130769  318.092188         0  0  0  0  0  0   0  ...   0   
 110  408.513615   45.988321  308.830833  1.119171  0  0  0  0  0   0  ...   0   
 175     639.325  823.634615           0         0  0  0  0  0  0   0  ...   0   
 
     h16 h17 h18 h19 h20 h21 h22 h23 h24  
 18    0   0   0   0   0   0   0   0   0  
 33    0   0   0   0   0   0   0   0   0  
 79    0   0   0   0   0   0   0   0   0  
 110   0   0   0   0   0   0   0   0   0  
 175   0   0   0   0   0   0   0   0   0  
 
 [5 rows x 24 columns],
 'sensor.shellyplug_s_4022d88984b8_power':               h1          h2          h3          h4           h5         h6  \
 11    288.104918  441.213672   78.097281   94.110562    

In [22]:
df_hours['sensor.shellyplug_s_4022d88984b8_power']

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
11,288.104918,441.213672,78.097281,94.110562,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
35,1367.209816,172.397639,101.281841,96.976491,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
113,221.466839,309.251868,112.707377,0.0,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
131,1360.677778,183.523295,77.974684,69.033941,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
157,736.927223,81.303515,80.515755,103.77455,1381.232203,96.614206,5.157143,0,0,0,...,0,0,0,0,0,0,0,0,0,0
281,5.25,309.161733,81.135623,106.920528,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Load Profiles


In [23]:
def load_profile(self, df_hours, shiftable_devices):
    import pandas as pd

    hours = df_hours[shiftable_devices[0]].columns
    loads = pd.DataFrame(columns=hours)

    for app in df_hours.keys():
        app_mean = df_hours[app].apply(lambda x: x.mean(), axis=0)
        for hour in app_mean.index:
            loads.loc[app, hour] = app_mean[hour]

    loads = loads.fillna(0)
    return loads
# add to Load agent
setattr(Load_Agent, 'load_profile', load_profile)
del load_profile 

In [24]:
Load_Agent_i = Load_Agent(df) 
loads = Load_Agent_i.load_profile(df_hours, shiftable_devices)
loads

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
sensor.shellyplug_s_4022d88961b4_power,579.906396,555.06866,125.384604,0.223834,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sensor.shellyplug_s_4022d88984b8_power,663.272762,249.475287,88.61876,78.469345,230.205367,16.102368,0.859524,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Evaluation Load

### Get true loads

In [25]:
# evaluating the performance of the load agent
# -------------------------------------------------------------------------------------------
def get_true_loads(self, shiftable_devices):
    true_loads = self.load_profile_raw(self.input, shiftable_devices)
    true_loads = self.load_profile_cleaned(true_loads)
    for device, loads in true_loads.items():
        true_loads[device].rename(
            index=dict(enumerate(self.input.index)), inplace=True
        )
    return true_loads
# add to Load agent
setattr(Load_Agent, 'get_true_loads', get_true_loads)
del get_true_loads 

In [26]:
Load_Agent_i = Load_Agent(df) 
true_loads = Load_Agent_i.get_true_loads(shiftable_devices)
true_loads

{'sensor.shellyplug_s_4022d88961b4_power':                              h1          h2          h3        h4 h5 h6 h7 h8  \
 2022-12-26 18:00:00  795.214932  541.985593           0         0  0  0  0  0   
 2022-12-27 09:00:00  319.984146     379.604           0         0  0  0  0  0   
 2022-12-29 07:00:00  736.494286  984.130769  318.092188         0  0  0  0  0   
 2022-12-30 14:00:00  408.513615   45.988321  308.830833  1.119171  0  0  0  0   
 2023-01-02 07:00:00     639.325  823.634615           0         0  0  0  0  0   
 
                     h9 h10  ... h15 h16 h17 h18 h19 h20 h21 h22 h23 h24  
 2022-12-26 18:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-27 09:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-29 07:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-30 14:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2023-01-02 07:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 
 [5 rows x 2

In [27]:
true_loads['sensor.shellyplug_s_4022d88984b8_power']

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
2022-12-26 11:00:00,288.104918,441.213672,78.097281,94.110562,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-27 11:00:00,1367.209816,172.397639,101.281841,96.976491,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-30 17:00:00,221.466839,309.251868,112.707377,0.0,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-31 11:00:00,1360.677778,183.523295,77.974684,69.033941,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01 13:00:00,736.927223,81.303515,80.515755,103.77455,1381.232203,96.614206,5.157143,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-06 17:00:00,5.25,309.161733,81.135623,106.920528,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Create pipline

In [28]:
# pipeline function: creating typical load profiles
# -------------------------------------------------------------------------------------------
def pipeline(self, df, date, shiftable_devices):
    df = self.prove_start_end_date(df, date)
    df = self.df_yesterday_date(df, date)
    df_hours = self.load_profile_raw(df, shiftable_devices)
    df_hours = self.load_profile_cleaned(df_hours)
    loads = self.load_profile(df_hours, shiftable_devices)
    return loads
# add to Load agent
setattr(Load_Agent, 'pipeline', pipeline)
del pipeline 


In [30]:
date = '2023-01-08'

In [31]:
df = output

In [32]:
Load_Agent_i = Load_Agent(df) 
loads = Load_Agent_i.pipeline(df, date, shiftable_devices)
loads

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
sensor.shellyplug_s_4022d88961b4_power,579.906396,555.06866,125.384604,0.223834,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sensor.shellyplug_s_4022d88984b8_power,663.272762,249.475287,88.61876,78.469345,230.205367,16.102368,0.859524,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Evaluation

In [33]:
true_loads = Load_Agent_i.get_true_loads(shiftable_devices)
device = 'sensor.shellyplug_s_4022d88984b8_power'
true_loads

{'sensor.shellyplug_s_4022d88961b4_power':                              h1          h2          h3        h4 h5 h6 h7 h8  \
 2022-12-26 18:00:00  795.214932  541.985593           0         0  0  0  0  0   
 2022-12-27 09:00:00  319.984146     379.604           0         0  0  0  0  0   
 2022-12-29 07:00:00  736.494286  984.130769  318.092188         0  0  0  0  0   
 2022-12-30 14:00:00  408.513615   45.988321  308.830833  1.119171  0  0  0  0   
 2023-01-02 07:00:00     639.325  823.634615           0         0  0  0  0  0   
 2023-01-08 16:00:00  871.181735  529.936458           0         0  0  0  0  0   
 
                     h9 h10  ... h15 h16 h17 h18 h19 h20 h21 h22 h23 h24  
 2022-12-26 18:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-27 09:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-29 07:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2022-12-30 14:00:00  0   0  ...   0   0   0   0   0   0   0   0   0   0  
 2023-0

In [34]:
date = '2023-01-08'

In [35]:
true_loads[device] = Load_Agent_i.prove_start_end_date(true_loads[device], date)

In [36]:
true_loads[device]

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
2022-12-27 11:00:00,1367.209816,172.397639,101.281841,96.976491,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-30 17:00:00,221.466839,309.251868,112.707377,0.0,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-12-31 11:00:00,1360.677778,183.523295,77.974684,69.033941,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-01 13:00:00,736.927223,81.303515,80.515755,103.77455,1381.232203,96.614206,5.157143,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-01-06 17:00:00,5.25,309.161733,81.135623,106.920528,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
def evaluate(self, shiftable_devices, date, metric="mse", aggregate=True, evaluation=False):
    from tqdm import tqdm
    import pandas as pd
    import numpy as np
    tqdm.pandas()

    if metric == "mse":
        import sklearn.metrics

        metric = sklearn.metrics.mean_squared_error

    true_loads = self.get_true_loads(shiftable_devices)

    scores = {}
    if not evaluation:
        for device in shiftable_devices:
            true_loads[device] = self.prove_start_end_date(true_loads[device], date)
        scores[device] = true_loads[device].progress_apply(
            lambda row: metric(
                row.values,
                self.pipeline(
                    self.input, str(row.name)[:10], [device]
                ).values.reshape(
                    -1,
                ),
            ),
                axis=1,
            )
    else:
        for device in shiftable_devices:
            true_loads[device] = self.prove_start_end_date(true_loads[device], date)
            scores[device] = {}
            for idx in tqdm(true_loads[device].index):
                date = str(idx)[:10]
                y_true = true_loads[device].loc[idx, :].values
                try:
                    y_hat = (df.loc[date][device].values.reshape(-1,))
                except KeyError:
                    try:
                        y_hat = self.pipeline(
                            self.input, date, [device]
                        ).values.reshape(
                            -1,
                        )
                    except:
                        y_hat = np.full(24, 0)
                scores[device][idx] = metric(y_true, y_hat)
            scores[device] = pd.Series(scores[device], dtype='float64')

    if aggregate:
        scores = {device: scores_df.mean() for device, scores_df in scores.items()}
    return scores

# add to Load agent
setattr(Load_Agent, 'evaluate', evaluate)
del evaluate 

evaluation just runs if the data later on is available. So we run it on 01.01.2023, because for 02.01. we donot have data for each hour

In [38]:
date = '2023-01-08'

In [39]:
df = output
df

Unnamed: 0_level_0,sensor.shellyplug_s_4022d88961b4_power,sensor.shellyplug_s_4022d88984b8_power
last_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-25 18:00:00,0.0,0.000000
2022-12-25 19:00:00,0.0,0.000000
2022-12-25 20:00:00,0.0,0.000000
2022-12-25 21:00:00,0.0,0.000000
2022-12-25 22:00:00,0.0,0.000000
...,...,...
2023-01-09 06:00:00,0.0,0.000000
2023-01-09 07:00:00,0.0,0.000000
2023-01-09 08:00:00,0.0,0.000000
2023-01-09 09:00:00,0.0,27.498667


In [40]:
Load_Agent_i = Load_Agent(df) 
scores = Load_Agent_i.evaluate(shiftable_devices, date, evaluation=False)
scores

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 60.76it/s]


{'sensor.shellyplug_s_4022d88984b8_power': 40327.73812175692}

In [41]:
Load_Agent_i = Load_Agent(df) 
scores = Load_Agent_i.evaluate(shiftable_devices, date, evaluation=True)
scores

100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 4214.53it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 473.69it/s]


{'sensor.shellyplug_s_4022d88961b4_power': 70831.42717745171,
 'sensor.shellyplug_s_4022d88984b8_power': 109495.24378810001}

# Complete Load Agent

In [42]:
# Load Agent
# ===============================================================================================
class Load_Agent:
    def __init__(self, load_input_df):
        self.input = load_input_df

    # selecting the correct data, identifying device runs, creating load profiles
    # -------------------------------------------------------------------------------------------
    def prove_start_end_date(self, df, date):
        import pandas as pd

        start_date = (df.index[0]).strftime("%Y-%m-%d")
        end_date = date

        if len(df.loc[start_date]) < 24:
            start_date = (pd.to_datetime(start_date) + pd.Timedelta(days=1)).strftime(
                "%Y-%m-%d"
            )
            df = df[start_date:end_date]
        else:
            df = df[:end_date]
        
        if end_date not in df:
            return df

        if len(df.loc[end_date]) < 24:
            end_new = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
                "%Y-%m-%d"
            )
            df = df[:end_new]
        else:
            df = df[:end_date]
        return df

    def df_yesterday_date(self, df, date):
        import pandas as pd

        yesterday = (pd.to_datetime(date) - pd.Timedelta(days=1)).strftime("%Y-%m-%d")
        return df[:yesterday]

    def load_profile_raw(self, df, shiftable_devices):
        import pandas as pd

        hours = []
        for hour in range(1, 25):
            hours.append("h" + str(hour))
        df_hours = {}

        for idx, appliance in enumerate(
            shiftable_devices
        ):
            df_hours[appliance] = pd.DataFrame(index=None, columns=hours)
            column = df[appliance]

            for i in range(len(column)):

                if (i == 0) and (column[0] > 0):
                    df_hours[appliance].loc[0, "h" + str(1)] = column[0]

                elif (column[i - 1] == 0) and (column[i] > 0):
                    for j in range(0, 24):
                        if (i + j) < len(column):
                            if column[i + j] > 0:
                                df_hours[appliance].loc[i, "h" + str(j + 1)] = column[
                                    i + j
                                ]
        return df_hours

    def load_profile_cleaned(self, df_hours):
        import numpy as np

        for app in df_hours.keys():
            for i in df_hours[app].index:
                for j in df_hours[app].columns:
                    if np.isnan(df_hours[app].loc[i, j]):
                        df_hours[app].loc[i, j:] = 0
        return df_hours

    def load_profile(self, df_hours, shiftable_devices):
        import pandas as pd

        hours = df_hours[shiftable_devices[0]].columns
        loads = pd.DataFrame(columns=hours)

        for app in df_hours.keys():
            app_mean = df_hours[app].apply(lambda x: x.mean(), axis=0)
            for hour in app_mean.index:
                loads.loc[app, hour] = app_mean[hour]

        loads = loads.fillna(0)
        return loads

    # evaluating the performance of the load agent
    # -------------------------------------------------------------------------------------------
    def get_true_loads(self, shiftable_devices):
        true_loads = self.load_profile_raw(self.input, shiftable_devices)
        true_loads = self.load_profile_cleaned(true_loads)
        for device, loads in true_loads.items():
            true_loads[device].rename(
                index=dict(enumerate(self.input.index)), inplace=True
            )
        return true_loads

    def evaluate(self, shiftable_devices, date, metric="mse", aggregate=True, evaluation=False):
        from tqdm import tqdm
        import pandas as pd
        import numpy as np
        tqdm.pandas()

        if metric == "mse":
            import sklearn.metrics

            metric = sklearn.metrics.mean_squared_error

        true_loads = self.get_true_loads(shiftable_devices)

        scores = {}
        if not evaluation:
            for device in shiftable_devices:
                true_loads[device] = self.prove_start_end_date(true_loads[device], date)
                scores[device] = true_loads[device].progress_apply(
                    lambda row: metric(
                        row.values,
                        self.pipeline(
                            self.input, str(row.name)[:10], [device]
                        ).values.reshape(
                            -1,
                        ),
                    ),
                    axis=1,
                )
        else:
            for device in shiftable_devices:
                true_loads[device] = self.prove_start_end_date(true_loads[device], date)
                scores[device] = {}
                for idx in tqdm(true_loads[device].index):
                    date = str(idx)[:10]
                    y_true = true_loads[device].loc[idx, :].values
                    try:
                        y_hat = (df.loc[date][device].values.reshape(-1,))
                    except KeyError:
                        try:
                            y_hat = self.pipeline(
                                self.input, date, [device]
                            ).values.reshape(
                                -1,
                            )
                        except:
                            y_hat = np.full(24, 0)
                    scores[device][idx] = metric(y_true, y_hat)
                scores[device] = pd.Series(scores[device], dtype='float64')

        if aggregate:
            scores = {device: scores_df.mean() for device, scores_df in scores.items()}
        return scores

    # pipeline function: creating typical load profiles
    # -------------------------------------------------------------------------------------------
    def pipeline(self, df, date, shiftable_devices):
        df = self.prove_start_end_date(df, date)
        df = self.df_yesterday_date(df, date)
        df_hours = self.load_profile_raw(df, shiftable_devices)
        df_hours = self.load_profile_cleaned(df_hours)
        loads = self.load_profile(df_hours, shiftable_devices)
        return loads

In [43]:
# calling the preparation pipelin
import pandas as pd
prep = Preparation_Agent(dbfile, shiftable_devices)
output, scaled, df = prep.pipeline_load(prep.input, load_pipe_params)
output
df = output

In [44]:
shiftable_devices = ["sensor.shellyplug_s_4022d88961b4_power", "sensor.shellyplug_s_4022d88984b8_power"]
load = Load_Agent(df)

date = '2023-01-08'

output = load.pipeline(df, date, shiftable_devices)
output

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24
sensor.shellyplug_s_4022d88961b4_power,579.906396,555.06866,125.384604,0.223834,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sensor.shellyplug_s_4022d88984b8_power,663.272762,249.475287,88.61876,78.469345,230.205367,16.102368,0.859524,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
