In [64]:
import numpy as np
import pandas as pd
import os
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
import math

%matplotlib inline

In [70]:
def getTrainTestdata(data_dir, appliance="refrigerator", house_idx=1):
    train_path = os.path.join(data_dir, appliance, "h" + str(house_idx) + "_train.csv")
    cur_df_train = pd.read_csv(train_path, index_col=0)
    
    test_path = os.path.join(data_dir, appliance, "h" + str(house_idx) + "_test.csv")
    cur_df_test = pd.read_csv(test_path, index_col=0)
    
    return cur_df_train, cur_df_test

In [71]:
def computeMetrics(df_pred):
    rmse_1 = math.sqrt(mean_squared_error(df_pred["gt_1"], df_pred["pred_1"]))
    rmse_2 = math.sqrt(mean_squared_error(df_pred["gt_2"], df_pred["pred_2"]))
    rmse_3 = math.sqrt(mean_squared_error(df_pred["gt_3"], df_pred["pred_3"]))
    
    print("1-hr RMSE = ", rmse_1)
    print("2-hr RMSE = ", rmse_2)
    print("3-hr RMSE = ", rmse_3)

In [78]:
def trainARIMA(df_train, df_test):
    history_data = list(df_train["output"])
    test_gt_data = list(df_test["output"])
    test_ts = list(df_test.index)
    y_pred_data = []
    y_pred_cols = ["time_stamp", "gt_1", "gt_2", "gt_3", "pred_1", "pred_2", "pred_3"]
    
    i = 0
    # Do Training and Prediction
    for idx in tqdm(range(len(test_gt_data)-2)):
        
        # Predict the next 1-hr
        cur_model = ARIMA(history_data, order=(10,1,0))
        cur_model_fit = cur_model.fit()
        cur_out = cur_model_fit.forecast()[0]
        
        # Predict the next 2-hr
        history_data_1 = history_data + [cur_out]
        cur_model = ARIMA(history_data_1, order=(10,1,0))
        cur_model_fit = cur_model.fit()
        cur_out_2 = cur_model_fit.forecast()[0]
        
        # Predict the next 3-hr
        history_data_2 = history_data_1 + [cur_out_2]
        cur_model = ARIMA(history_data_2, order=(10,1,0))
        cur_model_fit = cur_model.fit()
        cur_out_3 = cur_model_fit.forecast()[0]
        
        y_pred_data.append([test_ts[i], test_gt_data[i], test_gt_data[i+1], test_gt_data[i+2], cur_out, cur_out_2, cur_out_3])
        history_data.append(test_gt_data[i])
        i += 1
        
    y_pred_df = pd.DataFrame(data=y_pred_data, columns=y_pred_cols)
    
    # Compute Metrics:
    computeMetrics(y_pred_df)

In [79]:
base_data_dir = "../data/redd_forecast_processed/"

In [80]:
appliance_list = ["refrigerator", "dishwaser"]
for cur_app in appliance_list:
    print("Appliance = ", cur_app)
    df_train, df_test = getTrainTestdata(base_data_dir, appliance=cur_app, house_idx=1)
    trainARIMA(df_train, df_test)
    print("\n")

  0%|          | 0/260 [00:00<?, ?it/s]

Appliance =  refrigerator


100%|██████████| 260/260 [05:46<00:00,  1.33s/it]
  0%|          | 0/260 [00:00<?, ?it/s]

1-hr RMSE =  36.80262891761127
2-hr RMSE =  37.68169036112851
3-hr RMSE =  37.308303691832194


Appliance =  dishwaser


100%|██████████| 260/260 [05:47<00:00,  1.34s/it]

1-hr RMSE =  117.89277636330759
2-hr RMSE =  141.7360759276701
3-hr RMSE =  144.6487435744681





