This notebook is used in the improvement of our models.
The test do variation on the buffer size and the time interval of predictions

In [None]:
import os
import pandas as pd
import numpy as np
import time
from datetime import timedelta
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
from statsmodels.tsa.ar_model import AR
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

In [None]:
path = "dataset/test"
files = ["2019-05-13 00-00-01 scd30.csv","2019-05-28 01-00-01 scd30.csv","2019-06-13 01-00-02 scd30.csv",
          "2019-05-20 00-00-01 scd30.csv","2019-06-10 01-00-02 scd30.csv","2019-05-29 01-00-02 scd30.csv"]

In [None]:
def custom_resampler(array_like):
    return array_like[0]

In [None]:
###Autoregressive
import numpy as np
import time
from statsmodels.tsa.ar_model import AR
from datetime import timedelta

class Test_AR():

    def __init__(self,bufferSize,nbPred,nbPreMin,isLog,confint=30):
        self.avgAR = 0
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        self.nbPreMin = nbPreMin
        self.confint = confint
    
    def pred(self,ts,valid,f):
        start = time.time()
        modelAR = AR(ts).fit()
        try:
            pred = modelAR.predict(ts.index[-1],ts.index[-1] + timedelta(minutes=self.nbPred))
            end = time.time()
            f.write(str(end-start)+",")
            inPred = 0
            if(self.isLog):
                pred = np.exp(pred)
                ts = np.exp(ts)
            
            for k in range(0,len(pred)):
                if(valid[k]>= pred[k] - self.confint and valid[k]<= pred[k] + self.confint):
                    inPred += 1
            percent = inPred/len(pred)
        except Exception as e:
            #print(e)
            inPred = 0
            percent = 0
        f.write(str(percent))
        self.avgAR += percent

    def getAvg(self):
        return self.avgAR

In [None]:
###ARIMA
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
import numpy as np
import time

class Test_ARIMA():

    def __init__(self,bufferSize,nbPred,isLog):
        self.p = 0
        self.d = 0
        self.q = 0
        self.seasonalOrder = []
        self.avgArima = 0
        self.newModel = True
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        

    def pred(self,ts,valid,f):
        start = time.time()
        try:
            if(self.newModel):
                self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                                    start_P=0, seasonal=True, d=1, D=1, trace=False,
                                    error_action='ignore',  # don't want to know if an order does not work
                                    suppress_warnings=True,  # don't want convergence warnings
                                    stepwise=True)  # set to stepwise
                self.newModel = False
            else:
                so = self.seasonalOrder
                self.model = ARIMA(order=(self.p, self.d, self.q), seasonal_order=(so[0],so[1],so[2],so[3])).fit(ts)
            
        except Exception:
            self.model = auto_arima(ts, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                                    start_P=0, seasonal=True, d=1, D=1, trace=False,
                                    error_action='ignore',  # don't want to know if an order does not work
                                    suppress_warnings=True,  # don't want convergence warnings
                                    stepwise=True)  # set to stepwise
        pred, conf_int = self.model.predict(n_periods=self.nbPred, return_conf_int=True)
        end = time.time()
        f.write(str(end-start)+",")
        order = self.model.get_params()["order"]
        self.seasonalOrder = self.model.get_params()["seasonal_order"]
        self.p = order[0]
        self.d = order[1]
        self.q = order[2]
        
        inPred = 0
        if(self.isLog):
            conf_int = np.exp(conf_int)
        #for k in range(0,self.nbPred):
        for k in range(0,len(pred)):
            if(valid[k]>= conf_int[k][0] and valid[k]<= conf_int[k][1]):
                inPred += 1
        percent = inPred/len(pred)#self.nbPred
        f.write(str(percent)+",")
        f.write(str(conf_int[-1][1]-conf_int[-1][0]))
        self.avgArima += percent

    def getAvg(self):
        return self.avgArima

In [None]:
rangeDown = 0
rangeUp = 5
for i in range(rangeDown, rangeUp+1):
    dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d %H-%M-%S')
    data = pd.read_csv("dataset/test/"+files[i], parse_dates=['Time'], index_col='Time',date_parser=dateparse)
    idx = pd.date_range(start=data.index[0],end=data.index[-1],freq="S")
    dataFix = data.reindex(idx, fill_value=np.nan)
    dataFix = dataFix.interpolate()
    date_rng = pd.date_range(start=dataFix.index[0], end=dataFix.index[-1], freq='1T')
    
    nbIt = len(date_rng) - 35
    nbPredPMin = 12
    buffersize = 10
    nbDate = int(buffersize/5)
    print(nbDate)
    
    ###Test declaration
    ARIMA10_5 = Test_ARIMA(10,5*nbPredPMin,False)
    ARIMA10_10 = Test_ARIMA(10,10*nbPredPMin,False)
    ARIMA10_15 = Test_ARIMA(10,15*nbPredPMin,False)
    ARIMA20_5 = Test_ARIMA(20,5*nbPredPMin,False)
    ARIMA20_10 = Test_ARIMA(20,10*nbPredPMin,False)
    ARIMA20_15 = Test_ARIMA(20,15*nbPredPMin,False)

    AR10_5 = Test_AR(10,5,nbPredPMin,True)
    AR10_10 = Test_AR(10,10,nbPredPMin,True)
    AR10_15 = Test_AR(10,15,nbPredPMin,True)
    AR20_5 = Test_AR(20,5,nbPredPMin,True)
    AR20_10 = Test_AR(20,10,nbPredPMin,True)
    AR20_15 = Test_AR(20,15,nbPredPMin,True)
    
    print(nbIt)
    
    for j in range(0,nbIt):
        if(not j%10):
            print(str(j)+"/"+str(nbIt))
        f = open("results/buffpred/results-buff-pred"+str(i)+".csv","a")
        f.write(str(j)+",")

        #Buffer 10 minutes
        tsFix = dataFix[date_rng[j]:date_rng[j+10]]["#CO2"]
        tsFix = tsFix.resample("5S").mean()
        ts_log = np.log(tsFix)

        valid = dataFix[date_rng[j+10]:date_rng[j+15]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA10_5.pred(tsFix,valid,f)
        f.write(",")
        AR10_5.pred(ts_log,valid,f)
        f.write(",")

        valid = dataFix[date_rng[j+10]:date_rng[j+20]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA10_10.pred(tsFix,valid,f)
        f.write(",")
        AR10_10.pred(ts_log,valid,f)
        f.write(",")

        valid = dataFix[date_rng[j+10]:date_rng[j+25]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA10_15.pred(tsFix,valid,f)
        f.write(",")
        AR10_15.pred(ts_log,valid,f)
        f.write(",")

        #Buffer 20 minutes
        tsFix = dataFix[date_rng[j]:date_rng[j+20]]["#CO2"]
        tsFix = tsFix.resample("5S").mean()
        ts_log = np.log(tsFix)

        valid = dataFix[date_rng[j+20]:date_rng[j+25]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA20_5.pred(tsFix,valid,f)
        f.write(",")
        AR20_5.pred(ts_log,valid,f)
        f.write(",")

        valid = dataFix[date_rng[j+20]:date_rng[j+30]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA20_10.pred(tsFix,valid,f)
        f.write(",")
        AR20_10.pred(ts_log,valid,f)
        f.write(",")

        valid = dataFix[date_rng[j+20]:date_rng[j+35]]["#CO2"]
        valid = valid.resample("5S").mean()

        ARIMA20_15.pred(tsFix,valid,f)
        f.write(",")
        AR20_15.pred(ts_log,valid,f)
        
        f.write("\n")
        f.close()
    
    f = open("results/buffpred/results-buff-pred.csv","a")
    f.write("Iteration: "+str(i)+"\n")
    f.write("AVG ARIMA 10-5 "+str(ARIMA10_5.getAvg()/nbIt)+"\n")
    f.write("AVG ARIMA 10-10 "+str(ARIMA10_10.getAvg()/nbIt)+"\n")
    f.write("AVG ARIMA 10-15 "+str(ARIMA10_15.getAvg()/nbIt)+"\n")
    f.write("AVG ARIMA 20-5 "+str(ARIMA20_5.getAvg()/nbIt)+"\n")
    f.write("AVG ARIMA 20-10 "+str(ARIMA20_10.getAvg()/nbIt)+"\n")
    f.write("AVG ARIMA 20-15 "+str(ARIMA20_15.getAvg()/nbIt)+"\n")

    f.write("AVG AR 10-5 "+str(AR10_5.getAvg()/nbIt)+"\n")
    f.write("AVG AR 10-10 "+str(AR10_10.getAvg()/nbIt)+"\n")
    f.write("AVG AR 10-15 "+str(AR10_15.getAvg()/nbIt)+"\n")
    f.write("AVG AR 20-5 "+str(AR20_5.getAvg()/nbIt)+"\n")
    f.write("AVG AR 20-10 "+str(AR20_10.getAvg()/nbIt)+"\n")
    f.write("AVG AR 20-15 "+str(AR20_15.getAvg()/nbIt)+"\n")

    f.write("\n")

    f.close()
    
    
    print("Iteration: "+str(i))
    print("AVG ARIMA 10-5 "+str(ARIMA10_5.getAvg()/nbIt))
    print("AVG ARIMA 10-10 "+str(ARIMA10_10.getAvg()/nbIt))
    print("AVG ARIMA 10-15 "+str(ARIMA10_15.getAvg()/nbIt))
    print("AVG ARIMA 20-5 "+str(ARIMA20_5.getAvg()/nbIt))
    print("AVG ARIMA 20-10 "+str(ARIMA20_10.getAvg()/nbIt))
    print("AVG ARIMA 20-15 "+str(ARIMA20_15.getAvg()/nbIt))

    print("AVG AR 10-5 "+str(AR10_5.getAvg()/nbIt))
    print("AVG AR 10-10 "+str(AR10_10.getAvg()/nbIt))
    print("AVG AR 10-15 "+str(AR10_15.getAvg()/nbIt))
    print("AVG AR 20-5 "+str(AR20_5.getAvg()/nbIt))
    print("AVG AR 20-10 "+str(AR20_10.getAvg()/nbIt))
    print("AVG AR 20-15 "+str(AR20_15.getAvg()/nbIt))