This notebook is used by the first analyse.
It tests all the 8 algorithms on the 6 days randomly choose in the pool of data

In [None]:
import os
import pandas as pd
import numpy as np
import time
from datetime import timedelta
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
from statsmodels.tsa.ar_model import AR
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

In [None]:
path = "dataset/test"
files = ["2019-05-13 00-00-01 scd30.csv","2019-05-28 01-00-01 scd30.csv","2019-06-13 01-00-02 scd30.csv",
          "2019-05-20 00-00-01 scd30.csv","2019-06-10 01-00-02 scd30.csv","2019-05-29 01-00-02 scd30.csv"]

In [None]:
def custom_resampler(array_like):
    return array_like[0]

In [None]:
###Autoregressive
import numpy as np
import time
from statsmodels.tsa.ar_model import AR
from datetime import timedelta

class Test_AR():

    def __init__(self,bufferSize,nbPred,nbPreMin,isLog,confint=30):
        self.avgAR = 0
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        self.nbPreMin = nbPreMin
        self.confint = confint
    
    def pred(self,ts,valid,f):
        start = time.time()
        modelAR = AR(ts).fit()
        try:
            pred = modelAR.predict(ts.index[-1],ts.index[-1] + timedelta(minutes=self.nbPred))
            end = time.time()
            f.write(str(end-start)+",")
            inPred = 0
            if(self.isLog):
                pred = np.exp(pred)
                ts = np.exp(ts)
            
            for k in range(0,len(pred)):
                if(valid[k]>= pred[k] - self.confint and valid[k]<= pred[k] + self.confint):
                    inPred += 1
            percent = inPred/len(pred)
        except Exception as e:
            #print(e)
            inPred = 0
            percent = 0
        f.write(str(percent))
        self.avgAR += percent

    def getAvg(self):
        return self.avgAR

In [None]:
###ARIMA
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
import numpy as np
import time

class Test_ARIMA():

    def __init__(self,bufferSize,nbPred,isLog):
        self.p = 0
        self.d = 0
        self.q = 0
        self.seasonalOrder = []
        self.avgArima = 0
        self.newModel = True
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        

    def pred(self,ts,valid,f):
        start = time.time()
        try:
            if(self.newModel):
                self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                                    start_P=0, seasonal=True, d=1, D=1, trace=False,
                                    error_action='ignore',  # don't want to know if an order does not work
                                    suppress_warnings=True,  # don't want convergence warnings
                                    stepwise=True)  # set to stepwise
                self.newModel = False
            else:
                so = self.seasonalOrder
                self.model = ARIMA(order=(self.p, self.d, self.q), seasonal_order=(so[0],so[1],so[2],so[3])).fit(ts)
            
        except Exception:
            self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                                    start_P=0, seasonal=True, d=1, D=1, trace=False,
                                    error_action='ignore',  # don't want to know if an order does not work
                                    suppress_warnings=True,  # don't want convergence warnings
                                    stepwise=True)  # set to stepwise
        pred, conf_int = self.model.predict(n_periods=self.nbPred, return_conf_int=True)
        end = time.time()
        f.write(str(end-start)+",")
        order = self.model.get_params()["order"]
        self.seasonalOrder = self.model.get_params()["seasonal_order"]
        self.p = order[0]
        self.d = order[1]
        self.q = order[2]
        
        inPred = 0
        if(self.isLog):
            conf_int = np.exp(conf_int)
        for k in range(0,len(pred)):
            if(valid[k]>= conf_int[k][0] and valid[k]<= conf_int[k][1]):
                inPred += 1
        percent = inPred/len(pred)
        f.write(str(percent)+",")
        f.write(str(conf_int[-1][1]-conf_int[-1][0]))
        self.avgArima += percent

    def getAvg(self):
        return self.avgArima

In [None]:
###ARIMA with new model
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
import numpy as np
import time

class Test_ARIMA_NML():

    def __init__(self,bufferSize,nbPred,isLog):
        self.avgArima = 0
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        

    def pred(self,ts,valid,f):
        start = time.time()
        self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                            start_P=0, seasonal=True, d=1, D=1, trace=False,
                            error_action='ignore',  # don't want to know if an order does not work
                            suppress_warnings=True,  # don't want convergence warnings
                            stepwise=True)  # set to stepwise
                
        pred, conf_int = self.model.predict(n_periods=self.nbPred, return_conf_int=True)
        end = time.time()
        f.write(str(end-start)+",")

        inPred = 0
        if(self.isLog):
            conf_int = np.exp(conf_int)
        for k in range(0,self.nbPred):
            if(valid[k]>= conf_int[k][0] and valid[k]<= conf_int[k][1]):
                inPred += 1
        percent = inPred/self.nbPred
        f.write(str(percent))
        f.write(str(conf_int[-1][1]-conf_int[-1][0]))
        self.avgArima += percent

    def getAvg(self):
        return self.avgArima

In [None]:
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
import numpy as np
import time

class Test_ARIMA_Update():

    def __init__(self,bufferSize,nbPred,isLog):
        self.p = 0
        self.d = 0
        self.q = 0
        self.seasonalOrder = []
        self.avgArima = 0
        self.newModel = True
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        self.i = 0
        

    def pred(self,ts,valid,ts_Up,f):
        start = time.time()
        if(self.i%10 == 0):
            self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                            start_P=0, seasonal=True, d=1, D=1, trace=False,
                            error_action='ignore',  # don't want to know if an order does not work
                            suppress_warnings=True,  # don't want convergence warnings
                            stepwise=True)  # set to stepwise
        else:
            self.model.update(ts_Up)
        pred, conf_int = self.model.predict(n_periods=self.nbPred, return_conf_int=True)
        end = time.time()
        f.write(str(end-start)+",")
        
        inPred = 0
        if(self.isLog):
            conf_int = np.exp(conf_int)
        for k in range(0,self.nbPred):
            if(valid[k]>= conf_int[k][0] and valid[k]<= conf_int[k][1]):
                inPred += 1
        percent = inPred/self.nbPred
        f.write(str(percent))
        f.write(str(conf_int[-1][1]-conf_int[-1][0]))
        self.avgArima += percent
        self.i = self.i + 1

    def getAvg(self):
        return self.avgArima

In [None]:
rangeDown = 0
rangeUp = 5
for i in range(rangeDown, rangeUp+1):
    dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d %H-%M-%S')
    data = pd.read_csv("dataset/test/"+files[i], parse_dates=['Time'], index_col='Time',date_parser=dateparse)
    idx = pd.date_range(start=data.index[0],end=data.index[-1],freq="S")
    dataFix = data.reindex(idx, fill_value=np.nan)
    dataFix = dataFix.interpolate()
    date_rng = pd.date_range(start=dataFix.index[0], end=dataFix.index[-1], freq='5T')
    
    nbIt = len(date_rng) - 4
    nbPredMin = 12
    buffersize = 10
    nbDate = int(buffersize/5)
    print(nbDate)
    
    ###Test declaration
    ARIMA = Test_ARIMA(buffersize,5*nbPredMin,False)
    ARIMA_log = Test_ARIMA(buffersize,5*nbPredMin,True)
    ARIMA_new = Test_ARIMA_NML(buffersize,5*nbPredMin,False)
    ARIMA_new_log = Test_ARIMA_NML(buffersize,5*nbPredMin,True)
    ARIMA_up = Test_ARIMA_Update(buffersize,5*nbPredMin,False)
    ARIMA_up_log = Test_ARIMA_Update(buffersize,5*nbPredMin,True)
    
    AR30 = Test_AR(buffersize,30,nbPredMin,False)
    AR30_log = Test_AR(buffersize,30,nbPredMin,True)
    
    print(nbIt)
    
    for j in range(0,nbIt):
        if(not j%10):
            print(str(j)+"/"+str(nbIt))
        f = open("results/allAlgo/results"+str(i)+"-buff"+str(buffersize)+".csv","a")
        f.write(str(j)+",")
        tsFix = dataFix[date_rng[i]:date_rng[i+nbDate]]["#CO2"]
        tsFix = tsFix.resample("5S").mean()
        tsFix_Up = dataFix[date_rng[i+nbDate-1]:date_rng[i+nbDate]]["#CO2"]
        tsFix_Up = tsFix_Up.resample("5S").mean()
        ts_log = np.log(tsFix)
        ts_log_Up = np.log(tsFix_Up)
        valid = dataFix[date_rng[i+nbDate]:date_rng[i+nbDate+1]]["#CO2"]
        valid = valid.resample("5S").mean()
        
        ARIMA.pred(tsFix,valid,f)
        f.write(",")
        ARIMA_log.pred(ts_log,valid,f)
        f.write(",")
        ARIMA_new.pred(tsFix,valid,f)
        f.write(",")
        ARIMA_new_log.pred(ts_log,valid,f)
        f.write(",")
        ARIMA_up.pred(tsFix,valid,tsFix_Up,f)
        f.write(",")
        ARIMA_up_log.pred(ts_log,valid,ts_log_Up,f)
        f.write(",")
        
        AR30.pred(tsFix,valid,f)
        f.write(",")
        AR30_log.pred(ts_log,valid,f)
        
        f.write("\n")
        f.close()
    
    print("Iteration: "+str(i))
    print("AVG ARIMA "+str(ARIMA.getAvg()/nbIt))
    print("AVG ARIMA log "+str(ARIMA_log.getAvg()/nbIt))
    print("AVG ARIMA new model "+str(ARIMA_new.getAvg()/nbIt))
    print("AVG ARIMA new model log "+str(ARIMA_new_log.getAvg()/nbIt))
    print("AVG ARIMA update "+str(ARIMA_up.getAvg()/nbIt))
    print("AVG ARIMA update log "+str(ARIMA_up_log.getAvg()/nbIt))
    print("AVG AR "+str(AR30.getAvg()/nbIt))
    print("AVG AR log "+str(AR30_log.getAvg()/nbIt))