# **Import Object**

In [1]:
from datetime import datetime as dt
import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import get_x_y

# **Load Energy**

In [2]:
energy_6_9_60m = get_x_y.get_target_energy(house_id=2,
                                           target_months=[6, 7, 8, 9],
                                           intervals=3600)
energy_11_1_60m = get_x_y.get_target_energy(2, [11, 12, 1], 3600)

energy_6_9_30m = get_x_y.get_target_energy(2, [6, 7, 8, 9], 1800)
energy_11_1_30m = get_x_y.get_target_energy(2, [11, 12, 1], 1800)

2012-11-28:FileNotFoundError
skip this date
2012-11-28:FileNotFoundError
skip this date


# **Load Occupancy**

In [3]:
occupancy_sum = pd.read_csv('./ecodataset/Occupancy/02_summer.csv')
occupancy_sum = occupancy_sum.set_index('Unnamed: 0')

occupancy_win = pd.read_csv('./ecodataset/Occupancy/02_winter.csv')
occupancy_win = occupancy_win.set_index('Unnamed: 0')

In [4]:
occupancy_sum = get_x_y.to_intervals_occupancy(occupancies=occupancy_sum,
                                               intervals=3600)
occupancy_win = get_x_y.to_intervals_occupancy(occupancy_win, 3600)

In [5]:
occupancy_win = occupancy_win.T[15:].T
occupancy_sum = occupancy_sum.T[15:].T
# sliced periods don't have data for 2 weeks before

# **Create Ratio**

In [6]:
ratio_sum = get_x_y.build_ratio(date_columns=occupancy_sum.columns,
                                energies=energy_6_9_60m)
ratio_win = get_x_y.build_ratio(occupancy_win.columns, energy_11_1_60m)

In [7]:
col_sum = pd.DataFrame(ratio_sum.columns.tolist())
col_win = pd.DataFrame(ratio_win.columns.tolist())

# **Create Features**

In [8]:
energy_sum = get_x_y.get_corresponding_energy(
    occupancy_columns=occupancy_sum.columns,
    energies=energy_6_9_30m)
ratio_sum = ratio_sum.values.T.reshape(-1)

means, maxs, mins, stds, ranges, temps = get_x_y.create_features(energy=energy_sum, col=col_sum)
times = [t for t in range(0, 24, 1)] * len(occupancy_sum.columns)
seasons = [1 for i in range(len(ratio_sum))]
is_sundays = get_x_y.get_issunday(target_days=occupancy_sum.columns)

features_sum = pd.DataFrame({
    "Ratio": ratio_sum, "Mean": means, "Max": maxs,
    "Min": mins, "Std": stds, "Range": ranges, "Temp": temps,
    "Time": times, "Season": seasons, "Sunday": is_sundays
    })
features_sum["Lag-1_Ratio"] = np.append(0.5, features_sum["Ratio"].values[:-1])
features_sum["Lag+1_Ratio"] = np.append(features_sum["Ratio"].values[1:], 0.5)

features_sum = features_sum[(features_sum.Time > 5) & (features_sum.Time < 22)]
am_pm = get_x_y.get_am_pm(times=features_sum["Time"])
features_sum = features_sum.drop(columns="Time")
features_sum = features_sum.reset_index(drop=True)
features_sum = pd.concat([features_sum, am_pm], axis=1)
features_sum = features_sum.rename({0: "Am", 1: "Lunch", 2: "Pm"}, axis=1)

features_sum = features_sum.reindex([
    'Ratio', 'Mean', 'Max', 'Min',
    'Std', 'Range', "Temp", 'Lag-1_Ratio', 'Lag+1_Ratio',
    'Season', 'Sunday', 'Am', 'Lunch', 'Pm'
    ], axis=1)

# features_sum.to_csv("2_Sum_X_train.csv", index=False)
occupancy_sum = occupancy_sum[6:22].values.T.reshape(-1)
occupancy_sum = pd.DataFrame(occupancy_sum, columns=["Occupancy"])
# occupancy_sum.to_csv("2_Sum_Y_train.csv", index=False)

In [9]:
energy_win = get_x_y.get_corresponding_energy(occupancy_win.columns,
                                              energy_11_1_30m)
ratio_win = ratio_win.values.T.reshape(-1)

means, maxs, mins, stds, ranges, temps = get_x_y.create_features(energy_win, col_win)
times = [t for t in range(0, 24, 1)] * len(occupancy_win.columns)
seasons = [0 for i in range(len(ratio_win))]
is_sundays = get_x_y.get_issunday(occupancy_win.columns)

features_win = pd.DataFrame({
    "Ratio": ratio_win, "Mean": means, "Max": maxs,
    "Min": mins, "Std": stds, "Range": ranges, "Temp": temps,
    "Time": times, "Season": seasons, "Sunday": is_sundays
    })
features_win["Lag-1_Ratio"] = np.append(0.5, features_win["Ratio"].values[:-1])
features_win["Lag+1_Ratio"] = np.append(features_win["Ratio"].values[1:], 0.5)

features_win = features_win[(features_win.Time > 5) & (features_win.Time < 22)]
am_pm = get_x_y.get_am_pm(features_win["Time"])
features_win = features_win.drop(columns="Time")
features_win = features_win.reset_index(drop=True)
features_win = pd.concat([features_win, am_pm], axis=1)
features_win = features_win.rename({0: "Am", 1: "Lunch", 2: "Pm"}, axis=1)

features_win = features_win.reindex([
    'Ratio', 'Mean', 'Max', 'Min',
    'Std', 'Range', 'Temp', 'Lag-1_Ratio', 'Lag+1_Ratio',
    'Season', 'Sunday', 'Am', 'Lunch', 'Pm'
    ], axis=1)

# features_win.to_csv("2_Win_X_train.csv", index=False)
occupancy_win = occupancy_win[6:22].values.T.reshape(-1)
occupancy_win = pd.DataFrame(occupancy_win, columns=["Occupancy"])
# occupancy_win.to_csv("2_Win_Y_train.csv", index=False)

In [10]:
X = pd.concat([features_sum, features_win], axis=0)
Y = pd.concat([occupancy_sum, occupancy_win], axis=0)
X.to_csv("2_X_train.csv", index=False)
Y.to_csv("2_Y_train.csv", index=False)