In [None]:
!pip3 install pmdarima
!pip3 install seaborn

In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
from pmdarima.arima import auto_arima
import multiprocessing as mp

sns.set(rc={'figure.figsize':(11.7,8.27)})

## Reading the datasets

df1 = pd.read_csv("data/BATADAL_dataset03.csv")
df2 = pd.read_csv("data/BATADAL_dataset04.csv")
df2.columns = df2.columns.str.replace(" ", "")
dftest = pd.read_csv("data/BATADAL_test_dataset.csv")

In [None]:
## Trying to fit ARMA models on the training data

target_sensors = df1.columns.tolist()
target_sensors.remove("DATETIME")
target_sensors.remove("ATT_FLAG")
target_sensors = target_sensors[0:4]
models = []

def get_arima(sensor, series):
    print("-"*20)
    print("Currently trying to fit " + sensor)
    print("-"*20)
    stepwise_fit = auto_arima(df1[sensor], start_p=1, start_q=1, max_p=50, max_q=50,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise
    return stepwise_fit

pool = mp.Pool(processes=4)
models = [pool.apply_async(get_arima, args=(sensor, df1[sensor].values.copy(),)) for sensor in target_sensors]
pool.close()  # do not accept any more tasks
pool.join()  # wait for the completion of all scheduled jobs
print(models)


--------------------
--------------------
--------------------
--------------------
Currently trying to fit L_T2
Currently trying to fit L_T3
Currently trying to fit L_T4
--------------------
--------------------
Currently trying to fit L_T1
--------------------
--------------------


In [None]:
## Saving these models
import pickle 

filename = "ARMA_models.obj"
file_to_write = open(filename, 'w') 
pickle.dump(models, file_to_write)

In [19]:
## Get the best fits



array([0.50972992, 0.41258049, 0.32011184, ..., 3.11468458, 2.93160057,
       2.74699712])

In [66]:
## Show example predictions

predictions = stepwise_fit.predict_in_sample()
true_data = df2[' L_T1']

fig, ax = plt.subplots()
sns.lineplot(data=predictions[0:1000], ax=ax, color='b')
sns.lineplot(data=true_data[0:1000], ax=ax, color='r')

KeyError: ' L_T1'

In [None]:
## Set a threshold for anomalies and see if there are any

df2.loc[abs(predictions - true_data) > 0.5, [' DATETIME', ' ATT_FLAG']]