## Anomaly detection on time series dataset with Facebook Prophet
#### The data is from a field bus of a building automation system. Apart from heating, ventilation and airconditioning as well as lighting and shading, critical services such as fire alarm or access control systems are added to building automation. 

### Import of all necessary library

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
from datetime import datetime
import pandas as pd
import plotly.express as px
import json
from prophet import Prophet
from prophet.serialize import model_to_json, model_from_json

mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['axes.grid'] = False

dataset_name = "3.4.26"




### Tolerence level can be changed according to the necessity of how it sensitive the anomaly system is. Generally the number should be between 1 to 3.5. The number can be also higher. But that means that the model is bad at predicting and should be changed. The higher the number the less sensative the anomaly detection is

In [None]:
tolerence_level = 1

In [None]:
df = pd.read_csv(dataset_name +".csv")

In [None]:
df

In [None]:

df['datetime']=pd.to_datetime(df['datetime'])

In [None]:

df.info()

In [None]:
df.head()

In [None]:
df

### Plotting the whole dataset to see what we are working with

In [None]:
fig = px.line(df.reset_index(), x='datetime', y='source_addr', title='Whole Dataset')

fig.update_xaxes(
    rangeslider_visible=True,
)
fig.show()

### Facebook Prophet takes two parameter "ds" which is time, and "y" that is what we are trying to predict.

In [None]:
df_final=df.reset_index()[['datetime','source_addr']].rename({'datetime':'ds','source_addr':'y'}, axis='columns')


In [None]:
df_final = df_final[(df_final['ds'] >= '2019-07-19') & (df_final['ds'] <= '2021-02-23')]



### Splitting the dataset to train and test part. We will train the dataset with the splitted data but we will plot the predicted data on the whole dataset

In [None]:
train=df_final[(df_final['ds'] >= '2019-07-19') & (df_final['ds'] <= '2020-12-14')]
test=df_final[(df_final['ds'] >= '2020-12-15') & (df_final['ds'] <= '2021-02-23')]

In [None]:
train.shape
train

In [None]:
test.shape

In [None]:
test

### Initializing Prophet

In [None]:
m = Prophet(interval_width=0.95)
m.add_country_holidays(country_name='DE')

In [None]:

m.fit(train)

### Making 5000 hours of new prediction

In [None]:
future = m.make_future_dataframe(periods=5000,freq='H', include_history = True)
future

### The predict method gives us three values. "yhat" the predicted value. Because the predicted value is not perfect and there are uncertainity. Two more values are given. "yhat_upper" and "yhat_lower" which gives the upper and lower uncertainity level

In [None]:
forecast = m.predict(future)    
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:

results= pd.concat([df_final.set_index('ds')['y'],forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']]],axis=1)



In [None]:
results.plot()

In [None]:

fig1 = m.plot(forecast)

### The model also decomposes the data to show daily, weekly, monthly and yearly trend. 

In [None]:
comp=m.plot_components(forecast)

In [None]:
results

### We will first find the error by, error = y - yhat. The uncertainity as uncertainty = yhat_upper - yhat_lower. Error by, anomaly = "yes" if |error| > tolerence_level * uncertainty, else "no".    

In [None]:
results['error'] = results['y'] - results['yhat']

In [None]:
results["uncertainty"] = results['yhat_upper'] - results['yhat_lower']

In [None]:
results

In [None]:
results[results['error'].abs() >  tolerence_level*results['uncertainty']]

In [None]:
results['anomaly'] = results.apply(lambda x: 'Yes' if(np.abs(x['error']) >  tolerence_level*x['uncertainty']) else 'No', axis=1)

results

In [None]:
fig = px.scatter(results.reset_index(), x='ds', y='y', color='anomaly', title='Anomaly Detection')

fig.update_xaxes(
    rangeslider_visible=True
    
)
fig.show()

In [None]:
comp=m.plot_components(forecast)

In [None]:
results.query("anomaly == 'Yes'" )

## Saving and Loading Model

In [None]:



with open("Dataset/source addresses/Saved Model/"+ dataset_name +".json", 'w') as fout:
    json.dump(model_to_json(m), fout)  # Save model

with open("Dataset/source addresses/Saved Model/"+ dataset_name +".json", 'r') as fin:
    new_m = model_from_json(json.load(fin))  # Load model
    

### Introducing custom anomaly

In [None]:
df_final.loc[df_final["ds"] == "2021-01-27 00:00:00", "y"] = 30


In [None]:
future = new_m.make_future_dataframe(periods=5000,freq='H',  include_history = True)
forecast = new_m.predict(future)
results= pd.concat([df_final.set_index('ds')['y'],forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']]],axis=1)


In [None]:
results['error'] = results['y'] - results['yhat']
results["uncertainty"] = results['yhat_upper'] - results['yhat_lower']
results[results['error'].abs() >  tolerence_level*results['uncertainty']]
results['anomaly'] = results.apply(lambda x: 'Yes' if(np.abs(x['error']) >  tolerence_level*x['uncertainty']) else 'No', axis=1)



### Plotting Anomaly 

In [None]:
fig = px.scatter(results.reset_index(), x='ds', y='y', color='anomaly', title='Anomaly Detection')

fig.update_xaxes(
    rangeslider_visible=True,
)
fig.show()