# IMPORTS

In [1]:
from hvac_control.data import load_data, save_img, save_data
from hvac_control.preprocessing import *
from hvac_control.decision_plots import *

#load preprocessed data
data_to_load = "gaia_data_1.csv"
df_index = load_data(data_to_load, header_names=None, index=True)

# DATASET CREATION

We filter the dataset

In [3]:
df_index = filter_signal_non_causal(df_index, ['T_ext', 'Solar_irrad', 'T_imp', 'BC1_power', 'BC2_power', 'T_ret'], 5)

We add new columns to the dataset

In [4]:
df_index['Diff_temp'] = df_index['T_imp'] - df_index['T_ret']
df_index['Day_week'] = df_index.index.to_series().dt.dayofweek
df_index['Hours'] = df_index.index.to_series().dt.hour
df_index['T_ret_in_1h'] = df_index['T_ret'].shift(-30) # 1 hour is 30 rows, since between rows there is a 2 min interval
df_index = df_index.iloc[:-30]

Encoding cyclic / periodic features

In [5]:
df_index['Hours_sin'] = np.sin(2 * np.pi *   df_index['Hours']/24.0)
df_index['Hours_cos'] = np.cos(2 * np.pi *   df_index['Hours']/24.0)
df_index['Day_week_sin'] = np.sin(2 * np.pi * df_index['Day_week']/7)
df_index['Day_week_cos'] = np.cos(2 * np.pi * df_index['Day_week']/7)
df_index=df_index.drop(['Day_week', 'Hours'], axis=1)

We remove the day 25 of May because it was observed that it behaves differently w.r.t. the other days. We also change the ending hour to 17:30 instead of 18:30 because we want to predict the T_ret 1 hour into the future, so at 17:30 we will be predicting the T_ret at 18:30 (last working hour).

In [6]:
df_index = remove_specific_day(df_index, '2022-05-25')
dfs_day_working_hours = remove_non_working_hours(df_index, strating_hour='05:00', ending_hour='17:30')

Remove index datetime and add day number column. This added column will be useful for the train/test split of the dataset

In [17]:
dfs_for_prediction = []

for i,df_day in enumerate(dfs_day_working_hours):
    df_day=df_day.reset_index().drop('datetime', axis=1)
    df_day['Day'] = i
    dfs_for_prediction.append(df_day)

In [18]:
x_columns = ['Day', 'T_ext', 'Solar_irrad', 'T_imp', 
           'BC1_power', 'BC2_power', 'Diff_temp',
           'Hours_sin', 'Hours_cos', 'T_ret'] 

y_column = ['T_ret_in_1h']

all_columns = x_columns + y_column

df_for_prediction = pd.concat(dfs_for_prediction, ignore_index=True)
df_for_prediction = df_for_prediction[all_columns]
print(df_for_prediction.shape)
df_for_prediction.head()

(5640, 11)


Unnamed: 0,Day,T_ext,Solar_irrad,T_imp,BC1_power,BC2_power,Diff_temp,Hours_sin,Hours_cos,T_ret,T_ret_in_1h
0,0,19.778001,25.0,9.132007,3.09,15.77,-0.825995,0.965926,0.258819,9.958002,11.320013
1,0,19.800001,26.8,9.132007,3.09,15.76,-0.829993,0.965926,0.258819,9.962,11.320013
2,0,19.810001,28.8,9.14201,3.09,15.74,-0.835992,0.965926,0.258819,9.978003,11.280011
3,0,19.82,32.6,9.120007,3.09,15.75,-0.811993,0.965926,0.258819,9.932001,11.21001
4,0,19.83,36.8,9.102008,3.1,15.74,-0.801996,0.965926,0.258819,9.904004,11.134009
