In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
df1 = pd.read_csv('ErrorLog_Ver3/errorlog0601_0614_edit.csv')  
df2 = pd.read_csv('ErrorLog_Ver3/errorlog0615_0630_edit.csv')  
df3 = pd.read_csv('ErrorLog_Ver3/errorlog0701_0715_edit.csv')  
df4 = pd.read_csv('ErrorLog_Ver3/errorlog0716_0730_edit.csv')  
df5 = pd.read_csv('ErrorLog_Ver3/errorlog0731_0814_edit.csv')  
df6 = pd.read_csv('ErrorLog_Ver3/errorlog0815_0828_edit.csv')  
df7 = pd.read_csv('ErrorLog_Ver3/errorlog0829_0831_edit.csv')  

df = pd.concat([df1,df2,df3,df4,df5,df6,df7],axis=0, ignore_index=True)
print(df.shape)
df.head(5)

In [None]:
month = [6,7,8]

EventTime = df.EventTime

date_dict = {}
for _time in EventTime:
    dt = datetime.fromtimestamp(_time)
    
    if dt.month not in month:
        continue
    
    t = f"{dt.month:02d}{dt.day:02d}-{dt.hour:02d}~{(dt.hour+1):02d}"
    
    if date_dict.get(t, 0):
        date_dict[t] += 1
    else:
        date_dict[t] = 1

error_date = []
error_count = []
for k in sorted(date_dict.keys()):
    error_date.append(k)
    error_count.append(date_dict[k])
    
print(len(error_date))

In [None]:
%matplotlib inline

plt.rcParams['figure.figsize'] = [25, 6]
fig = plt.figure()
plt.plot(error_date, error_count)
plt.xticks(error_date[:][::24], fontsize=12, rotation=60)
plt.yticks(fontsize=18)
plt.show()

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tqdm import tqdm

SPLIT = 1300

p_list = [24, 48, 72, 96, 120, 144, 168]
train = error_count[0:SPLIT]
test = error_count[SPLIT:]

for p in p_list:
    predict = []
    test_mse = []

    print(f"------------p | {p}-----------")
    for t in tqdm(range(len(test))):
        model = SARIMAX(train, trend='c', order=(p, 0, 0), enforce_stationarity=False, enforce_invertibility=False)
        model_fit = model.fit()
        output = model_fit.forecast()
        
        yhat = output[0]
        predict.append(yhat)
        obs = test[t]
        test_mse.append(abs(yhat - obs))
        del train[0]
        
        if (abs(yhat - obs) < 5000):
            train.append(obs)
        else:
            train.append(yhat)

    plt.plot(error_date[SPLIT:], test, label='Ground truth')
    plt.plot(error_date[SPLIT:], predict, color='red', label='Prediction')
    plt.plot(error_date[SPLIT:], test_mse, color='black', label='Error')
    plt.xticks(error_date[SPLIT:][::24], fontsize=12, rotation = 60)
    plt.legend(loc='best')
    plt.title(f'AR (p)=({p}) | Error={np.asarray(test_mse).mean():.02f}',fontsize=18)
    plt.show()
#     plt.savefig(f"./fig/w_drop_anomaly/AR/AR:p_{p}")
#     plt.close()

In [None]:
# plt.plot(error_date[SPLIT:], test, label='Ground truth')
# plt.plot(error_date[SPLIT:], predictions, color='red', label='Prediction')
# plt.plot(error_date[SPLIT:], test_mse, color='black', label='Error')
# plt.xticks(error_date[SPLIT:][::24], fontsize=12, rotation = 60)
# plt.legend(loc='best')
# plt.title(f'AR (p)=({p}) | Error={np.asarray(test_mse).mean():.02f}',fontsize=18)
# plt.show()