<a href="https://colab.research.google.com/github/ms-shyamkumar/jupyter-notebooks/blob/main/fbprophet_timeseries_anomaly_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from fbprophet import Prophet

In [15]:
FILE_NAME = 'daily-min-temperatures.csv' #csv file with two columns - datetime and value
sensitivity = 3  #sensitivity for anaomaly detection 
frequency = 'D' #frequency of input data as per prophet

In [16]:
df = pd.read_csv(FILE_NAME)
df.columns = ['ds','y']
df['ds']  = pd.to_datetime(df['ds'], infer_datetime_format=True)
print(df.shape)
print(df.head(2))

(3650, 2)
          ds     y
0 1981-01-01  20.7
1 1981-01-02  17.9


In [17]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['ds'], y=df['y'],
                    mode='lines',
                    name='lines'))

In [18]:
#Create prophet model to recreate historical data using prediction
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=1, include_history=True, freq=frequency)
forecast = m.predict(future)
anomaly = pd.merge(df, forecast, on=['ds', 'ds'])
anomaly.head(3)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds,y,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,1981-01-01,20.7,11.857441,11.657101,18.660353,11.857441,11.857441,3.328264,3.328264,3.328264,0.010955,0.010955,0.010955,3.317309,3.317309,3.317309,0.0,0.0,0.0,15.185705
1,1981-01-02,17.9,11.855608,11.810925,18.872177,11.855608,11.855608,3.375073,3.375073,3.375073,-0.019841,-0.019841,-0.019841,3.394914,3.394914,3.394914,0.0,0.0,0.0,15.23068
2,1981-01-03,18.8,11.853774,12.003511,18.950096,11.853774,11.853774,3.411907,3.411907,3.411907,-0.060133,-0.060133,-0.060133,3.47204,3.47204,3.47204,0.0,0.0,0.0,15.265682


In [19]:
#define sensitivity for outliers based on confidence interval and a sensitivity factor

anomaly['upper_margin'] = anomaly['yhat'] + sensitivity*(anomaly['yhat_upper'] - anomaly['yhat'])
anomaly['lower_margin'] = anomaly['yhat'] - sensitivity*(anomaly['yhat'] - anomaly['yhat_lower'])
anomaly['is_anomaly'] = np.where( (anomaly['y']> anomaly['upper_margin']) | (anomaly['y'] < anomaly['lower_margin']), 1, 0)

In [20]:
adf = anomaly[anomaly['is_anomaly']==1]

fig = go.Figure()
fig.add_trace(go.Scatter(x=anomaly['ds'], y=anomaly['y'],
                    mode='lines',
                    name='actual',
                    line = dict(color='blue')))
fig.add_trace(go.Scatter(x=anomaly['ds'], y=anomaly['yhat'],
                    mode='lines',
                    name='reconstructed',
                    line = dict(color='green')))
fig.add_trace(go.Scatter(x=adf['ds'], y=adf['y'],
                  mode='markers', name='anomalies',
                  line = dict(color='red')))