In [None]:
import numpy as np
import pandas as pd

from Import_Data import Import_Data
from system_id_utils import *

In [None]:
data_folder = 'data'

site = ['ciee']
start = "2018-01-01T00:00:00Z"
end = "2018-02-01T00:00:00Z"
point_type = "Building_Electric_Meter" # Green_Button_Meter, Building_Electric_Meter

aggregate = {
    'meter': 'MEAN',
    'tstat': 'MAX'
}

window = {
    'meter': '1m',
    'tstat': '1m'
}

obj = Import_Data()

In [None]:
for site, df in obj.get_data(site, start, end, point_type, aggregate, window):
        
    # Save df to csv if needed
    df.to_csv(data_folder + '/' + site + '.csv')
    
    df['oat'] = df['oat'].interpolate(axis=0, method='linear')
    num_zones = [col for col in df.columns if col.startswith('s')]
    df['state'] = np.sum(np.array([df['s%d'%i]*(10**i) for i in range(len(num_zones))]), axis=0)

    # df_dummies = pd.get_dummies(df, columns=num_zones)
    
    # CHECK: Delete below later
    # Below code does the same thing as above - using this for the "Stuff" cell
    new_cols = {}
    state_cols = []
    for col in num_zones:
        new_cols[col] = []
        for val in df[col].unique():
            new_col = '%s_%d'%(col, val)
            new_cols[col].append(new_col)
            state_cols.append(new_col)
            df[new_col] = 0
            df.loc[df.loc[(df[col]==val)].index, new_col] = 1
    state_cols = sorted(state_cols)
    

    # Independent and dependent variables
    X, y = [], []

    # Continuous blocks of data with 1-min interval (~500ms execution time)
    blocks = get_continuous_blocks(orig_df=df)
    
    # Each block has 1min interval data with no gaps
    for i, block in enumerate(blocks):

        prev, prev_state = None, None

        # df1 = df[start_time:end_time]
        df1 = df[block[0]: block[1]]

        # Each block has data with a single change in state only
        state_blocks = get_single_state_diff(orig_df=df1)
        
        for state_block in state_blocks:
            st = state_block[0]
            et = state_block[1]
            df2 = df1[st:et]

            # current_state = 0.0, 1.0...
            current_state = df2['state'].unique()[0]

            # first - first five minutes of new state (Not used anymore) (average)
            # last - last five minutes of the previous state (average)
            first, last = get_first_last_block_power(df2)

            # The first block will not have a previous
            if not prev:
                # previous state is the current state
                prev_state = current_state

                # prev = avg of last 5min of previous state
                prev = last

                continue

            # CHECK: OPTIMIZE! 
            # this line is setting the entire column to one value
            df2['prev'] = prev

            # CHECK: '5min' is hardcoded!
            df3 = df2.resample('5min').mean()
            df3['power_diff'] = df3['power'] - df3['prev']

            state_col_change = get_action_from_state_diff(prev_state=prev_state, current_state=current_state)

            # CHECK: Will this case ever happen? Isn't the whole point of
            # for state_block in state_blocks that we're looping through 
            # blocks when there's a state change?
            if not state_col_change:
                continue
            
            y.append(df3['power_diff'].values)

            # e.g. state_col_change = 's0_0'
            temp_col_change = 't' + state_col_change[1:].split('_')[0]

            prev = last
            prev_state = current_state

            tod = list(df3.index.hour*60 + df3.index.minute)
            num_rows = len(tod) # number of rows in current state

            # IAT of the zone that changed
            tin = list(df3[temp_col_change])

            # OAT
            tout = list(df3['oat'])

            # CHECK: Can optimize by using pd.get_dummies()
            state_flags = []
            for state_col in state_cols:
                if state_col == state_col_change:
                    flag_array = [1 for i in range(num_rows)]
                    state_flags.append(flag_array)
                else:
                    flag_array = [0 for i in range(num_rows)]
                    state_flags.append(flag_array)

            state_flags = np.array(state_flags).T

            for i in range(num_rows):
                row = [tod[i], tin[i], tout[i]] + list(state_flags[i, :])
                X.append(row)

            prev = last
            prev_state = current_state

    np_X = np.array(X)
    np_y = np.concatenate(y)
    
    # # Save np arrays if needed
    # pd.DataFrame(np_X).to_csv(data_folder + '/' + site + '-X.csv')
    # pd.DataFrame(np_y).to_csv(data_folder + '/' + site + '-y.csv')