This notebook is used in the improvement of our models.
The test if the trend of predicted CO2 values are above or below a threshold

In [None]:
import os
import pandas as pd
import numpy as np
import time
from datetime import timedelta
from pmdarima.arima import ARIMA
from pmdarima.arima import auto_arima
from statsmodels.tsa.ar_model import AR
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

In [None]:
testAllDays = False#Change this values if you want to test the models on all days

if(not testAllDays):
    path = "dataset/test"
    files = ["2019-05-13 00-00-01 scd30.csv","2019-05-28 01-00-01 scd30.csv","2019-06-13 01-00-02 scd30.csv",
              "2019-05-20 00-00-01 scd30.csv","2019-06-10 01-00-02 scd30.csv","2019-05-29 01-00-02 scd30.csv"]
else:
    path = "dataset/alldays"
    files = []
    for file in os.listdir(path):
        if(".csv" in file):
            files.append(file)

In [None]:
def custom_resampler(array_like):
    return array_like[0]

In [None]:
###Autoregressive with threshold
import numpy as np
import time
from statsmodels.tsa.ar_model import AR
from datetime import timedelta

class Test_AR_Threshold():

    def __init__(self,bufferSize,nbPred,isLog,confint=30,threshold=300):
        self.avgAR = 0
        self.bufferSize = bufferSize
        self.nbPred = nbPred
        self.isLog = isLog
        self.confint = confint
        self.threshold = threshold
        self.truepos = 0
        self.trueneg = 0
        self.falsepos = 0
        self.falseneg = 0
    
    def pred(self,ts,valid,f):
        start = time.time()
        modelAR = AR(ts).fit()
        try:
            pred = modelAR.predict(ts.index[-1],ts.index[-1] + timedelta(minutes=self.nbPred))
            end = time.time()
            f.write(str(end-start)+",")
            inPred = 0
            if(self.isLog):
                pred = np.exp(pred)
                ts = np.exp(ts)
            nbPredAbove = 0
            nbValidAbove = 0
            
            for p in pred:
                if(p >= self.threshold):
                    nbPredAbove += 1
                    
            for v in valid:
                if(v >= self.threshold):
                    nbValidAbove += 1

            if((nbPredAbove/len(pred))>=0.5):
                predAbove = True
            else:
                predAbove = False
            
            if((nbValidAbove/len(valid))>=0.5):
                valAbove = True
            else:
                valAbove = False
                
            if(predAbove == valAbove):
                percent = 1
            else:
                percent = 0
                
            if(predAbove == True and valAbove == True):
                self.truepos += 1
            if(predAbove == True and valAbove == False):
                self.falsepos += 1
            if(predAbove == False and valAbove == True):
                self.falseneg += 1
            if(predAbove == False and valAbove == False):
                self.trueneg += 1
 
            
        except Exception as e:
            inPred = 0
            percent = 0
        f.write(str(percent))
        self.avgAR += percent

    def getAvg(self):
        return self.avgAR
    def getConfMat(self):
        return [[self.truepos,self.falsepos],[self.falseneg,self.trueneg]]

In [None]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d %H-%M-%S')

for i in range(0,len(files)):
    
    data = pd.read_csv(path+'/'+files[i], parse_dates=['Time'], index_col='Time',date_parser=dateparse)
    idx = pd.date_range(start=data.index[0],end=data.index[-1],freq="S")
    dataFix = data.reindex(idx, fill_value=np.nan)
    dataFix = dataFix.interpolate()
    date_rng = pd.date_range(start=dataFix.index[0], end=dataFix.index[-1], freq='1T')
    j = len(date_rng) - 35
    nbPredPMin = 12
    
    confint = 20

    ART = Test_AR_Threshold(20,15,True,15,545)
    
    f = open("results/threshold/results-threshold.csv","a")
    f.write(files[i]+"\n")
    print(files[i]+"\n")
    f.close()
    ts = dataFix["#CO2"]
    print("TS max: "+str(ts.max()))
    print("TS mean: "+str(ts.mean()))
    print("TS median: "+str(ts.median()))
    for k in range(0,j):
        f = open("results/threshold/results-threshold"+str(i)+".csv","a")
        f.write(str(k)+",")
        
        #Buffer 20 minutes
        tsFix = dataFix[date_rng[k]:date_rng[k+20]]["#CO2"]
        valid = dataFix[date_rng[k+20]:date_rng[k+35]]["#CO2"]
        
        tsFix = tsFix.resample("5S").mean()
        ts_log = np.log(tsFix)  
        valid = valid.resample("5S").apply(custom_resampler)
        
        ART.pred(ts_log,valid,f)
        
        f.write("\n")
        f.close()
    
    f = open("results/threshold/results-threshold.csv","a")
    f.write("Iteration: "+str(i)+"\n")
    f.write("AVG AR "+str(ART.getAvg()/j)+"\n")
    f.write(str(ART.getConfMat())+"\n")
    f.close()
    
    print("AVG AR "+str(ART.getAvg()/j))
    print(ART.getConfMat())
    print("\n")