In [1]:
import os
import pickle

import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
hotel_hourly_pwr_df = pd.read_feather("../data/feature_engineered/hotel__hourly_shift_rolling.feather")

hotel_hourly_pwr_df.set_index("timestamp", inplace=True)

display(hotel_hourly_pwr_df.head())

Unnamed: 0_level_0,hotel,shift_1,shift_2,rolling_1_mean,rolling_2_mean,rolling_3_mean,rolling_2_min,rolling_2_max
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-08-01 00:00:00,69.667207,,,69.667207,,,,
2019-08-01 01:00:00,69.632524,69.667207,,69.632524,69.649865,,69.632524,69.667207
2019-08-01 02:00:00,67.874325,69.632524,69.667207,67.874325,68.753424,69.058018,67.874325,69.632524
2019-08-01 03:00:00,67.816213,67.874325,69.632524,67.816213,67.845269,68.441021,67.816213,67.874325
2019-08-01 04:00:00,65.888407,67.816213,67.874325,65.888407,66.85231,67.192982,65.888407,67.816213


### Modeling Step

Since the data is available from Aug 1, 2019 - Sep 30, 2019. Will try to **forecast** the _power consumed_ by the hotel for the next **7 days on hourly granularity**.

#### Evaluation Metrics

* MAE (Mean Absolute Error)


* MSE (Mean Squared Error)

### Dividing training and testing data

In [3]:
df_train = hotel_hourly_pwr_df[:"2019-09-23"]
df_test = hotel_hourly_pwr_df["2019-09-24": "2019-09-30"]

In [4]:
X_train = df_train.iloc[2:, 1:].values
y_train = df_train.iloc[2:, 0].values

X_test = df_test.iloc[2:, 1:].values
y_test = df_test.iloc[2:, 0].values

### Performing Standard Scaling

In [5]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Fitting a simple linear regression model

In [6]:
lin_reg = LinearRegression()

In [7]:
lin_reg.fit(X_train, y_train)

LinearRegression()

In [8]:
y_pred = lin_reg.predict(X_test)

### Model Evaluation

In [9]:
mean_absolute_error(y_test, y_pred)

9.93047678893634e-15

In [10]:
mean_squared_error(y_test, y_pred)

1.4309746733744041e-28

### Saving pickle files

In [11]:
os.makedirs("../pickles/", exist_ok=True)

In [12]:
with open("../pickles/standard_scaler.pkl", "wb") as fp:
    pickle.dump(sc, fp)
    
with open("../pickles/lin_reg.pkl", "wb") as fp:
    pickle.dump(lin_reg, fp)