# XBOS_HVAC_Predictions
This notebook predicts the HVAC power consumption when there's a single state change in the zones.

# Future
Future implementation includes predicting the HVAC power consumption when there are multiple state changes in the zones.

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from system_id_utils import *

# Import Data

In [12]:
data_folder = 'data'
site = 'avenal_animal_shelter'

In [3]:
df_power = pd.read_csv(data_folder + '/' + site + '-meter_data.csv', index_col=0, parse_dates=True)
df_tstat = pd.read_csv(data_folder + '/' + site + '-tstat_data.csv', index_col=0, parse_dates=True)
df_weather = pd.read_csv(data_folder + '/' + site + '-oat_data.csv', index_col=0, parse_dates=True)

In [4]:
df_power.columns = ['power']
df_weather = df_weather.drop(columns=['a6e9f305-d76b-3e1a-8b11-e3d312134f02'])
df_weather = df_weather.interpolate(axis=0, method='linear')
df_weather.columns = ['oat']

In [5]:
df = pd.concat([df_power, df_tstat, df_weather], axis=1).dropna()
num_zones = [col for col in df_tstat.columns if col.startswith('s')]
df['state'] = np.sum(np.array([df['s%d'%i]*(10**i) for i in range(len(num_zones))]), axis=0)

# Prepare data for Modeling

In [None]:
df_dummies = pd.get_dummies(df, columns=num_zones)
# df_dummies

# CHECK: Delete below later
# Below code does the same thing as above - using this for the "Stuff" cell
new_cols = {}
state_cols = []
for col in num_zones:
    new_cols[col] = []
    for val in df[col].unique():
        new_col = '%s_%d'%(col, val)
        new_cols[col].append(new_col)
        state_cols.append(new_col)
        df[new_col] = 0
        df.loc[df.loc[(df[col]==val)].index, new_col] = 1
state_cols = sorted(state_cols)

In [None]:
df_dummies.head()

In [None]:
# Independent and dependent variables
X, y = [], []

# Continuous blocks of data with 1-min interval (~500ms execution time)
blocks = get_continuous_blocks(orig_df=df)

# Each block has 1min interval data with no gaps
for i, block in enumerate(blocks):
    
    prev, prev_state = None, None
    
    # df1 = df[start_time:end_time]
    df1 = df[block[0]: block[1]]
    
    # Each block has data with a single change in state only
    state_blocks = get_single_state_diff(orig_df=df1)
    
    for state_block in state_blocks:
        st = state_block[0]
        et = state_block[1]
        df2 = df1[st:et]
        
        # current_state = 0.0, 1.0...
        current_state = df2['state'].unique()[0]
                
        # first - first five minutes of new state (Not used anymore) (average)
        # last - last five minutes of the previous state (average)
        first, last = get_first_last_block_power(df2)
        
        # The first block will not have a previous
        if not prev:
            # previous state is the current state
            prev_state = current_state
            
            # prev = avg of last 5min of previous state
            prev = last
            
            continue
        
        # CHECK: OPTIMIZE! 
        # this line is setting the entire column to one value
        df2['prev'] = prev
        
        # CHECK: '5min' is hardcoded!
        df3 = df2.resample('5min').mean()
        df3['power_diff'] = df3['power'] - df3['prev']
        
        state_col_change = get_action_from_state_diff(prev_state=prev_state, current_state=current_state)
        
        # CHECK: Will this case ever happen? Isn't the whole point of
        # for state_block in state_blocks that we're looping through 
        # blocks when there's a state change?
        if not state_col_change:
            continue
        
        y.append(df3['power_diff'].values)
        
        # e.g. state_col_change = 's0_0'
        temp_col_change = 't' + state_col_change[1:].split('_')[0]
        
        prev = last
        prev_state = current_state
        
        tod = list(df3.index.hour*60 + df3.index.minute)
        num_rows = len(tod) # number of rows in current state
        
        # IAT of the zone that changed
        tin = list(df3[temp_col_change])
        
        # OAT
        tout = list(df3['oat'])
        
        state_flags = []
        for state_col in state_cols:
            if state_col == state_col_change:
                flag_array = [1 for i in range(num_rows)]
                state_flags.append(flag_array)
            else:
                flag_array = [0 for i in range(num_rows)]
                state_flags.append(flag_array)
                
        state_flags = np.array(state_flags).T
        
        for i in range(num_rows):
            row = [tod[i], tin[i], tout[i]] + list(state_flags[i, :])
            X.append(row)

        prev = last
        prev_state = current_state
               
np_X = np.array(X)
np_y = np.concatenate(y)

# X = [ [1185, 70.29999999999998, 69.92474999999999, 1, 0, 0, 0], ...]

In [None]:
# pd.DataFrame(np_X).to_csv(data_folder + '/' + site + '-X.csv')
# pd.DataFrame(np_y).to_csv(data_folder + '/' + site + '-y.csv')

# Modeling

## Get Data

In [None]:
X = pd.read_csv(data_folder + '/' + site + '-X.csv', index_col=[0])
X.columns = ['tod', 'IAT', 'OAT', 's0_0', 's0_1', 's0_2', 's0_5']

y = pd.read_csv(data_folder + '/' + site + '-y.csv', index_col=[0])
y.columns = ['delta_power']

In [23]:
# X.head()
# y.head()

## LSTM

## Random Forest

## ANN