In [None]:
import pandas as pd
import numpy as np
import itertools

from sklearn.metrics import mean_absolute_error

from prophet import Prophet
from prophet.plot import add_changepoints_to_plot
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.plot import plot_cross_validation_metric

from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

from matplotlib import pyplot

import altair as alt

from additional_holidays import holidays
#from additional_holidays_fix import holidays


In [None]:
df = pd.read_csv('COMP_SALES_2013-Jan102022.csv', header=0)
df

In [None]:
df.rename(columns={'SALES_DATE': 'ds', 'NET_SALE_AMOUNT':'y'}, inplace=True)
df = df[['ds','y']].dropna()
df.columns = ['ds', 'y']
df['ds']= pd.to_datetime(df['ds'])

In [None]:
df

In [None]:
#df = df.loc[df['ds']>='2016-1-31']
df = df.loc[ (df['ds'] >= '2016-1-31') & (df['ds'] <= '2021-11-30') ]
df

In [None]:
# frame up w/ grid
pyplot.figure(figsize=(16,4))
pyplot.grid(linestyle='-.')

# sketch in data
pyplot.plot(df.ds, df.y, 'b')

# set title & labels
pyplot.title('Daily Net Sales Amount', fontsize=18)
pyplot.ylabel('Net Sales Amount ($)', fontsize=13)
pyplot.xlabel('Time (year)', fontsize=13)

# display graph
pyplot.show()

In [None]:
# Setup and create time series model
m = Prophet(weekly_seasonality=True, yearly_seasonality=True, holidays=holidays, seasonality_mode='multiplicative')# #, holidays_prior_scale=0.5, changepoint_prior_scale=0.01, seasonality_prior_scale=5.0, # holidays_prior_scale=0.5,changepoint_prior_scale=0.01, seasonality_prior_scale=1.0
m.fit(df) #The last day of the dataset is not complete

# Create days for future prediction
future = m.make_future_dataframe(periods=180)

# Forecast
forecast = m.predict(future)

In [None]:
# https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet
# Visualizing the locations of change points
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)

In [None]:
# Interactive figure of forecast
plot_plotly(m, forecast)

In [None]:
# Interactive figure of forecast
plot_components_plotly(m, forecast)

In [None]:
mask = ( forecast['ds'] >= '2021-12-01') & ( forecast['ds'] <= '2021-12-31')
final = forecast.loc[mask][['ds','yhat']]
final['yhat'].sum()

In [None]:
def detect_anomalies(f):
    forecasted = f[['ds','trend', 'yhat', 'yhat_lower', 'yhat_upper', 'fact']].copy()
    #forecast['fact'] = df['y']

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] =  1
    forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = -1

    #anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] == 1, 'importance'] = (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] ==-1, 'importance'] = (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']
    
    return forecasted

def plot_anomalies(forecasted):
    interval = alt.Chart(forecasted).mark_area(interpolate="basis", color = '#7FC97F').encode(
    x=alt.X('ds:T',  title ='date'),
    y='yhat_upper',
    y2='yhat_lower',
    tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper']
    ).interactive().properties(
        title='Anomaly Detection'
    )

    fact = alt.Chart(forecasted[forecasted.anomaly==0]).mark_circle(size=15, opacity=0.7, color = 'Black').encode(
        x='ds:T',
        y=alt.Y('fact', title='sales'),    
        tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper']
    ).interactive()

    anomalies = alt.Chart(forecasted[forecasted.anomaly!=0]).mark_circle(size=30, color = 'Red').encode(
        x='ds:T',
        y=alt.Y('fact', title='sales'),    
        tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper','importance'],
        size = alt.Size('importance', legend=None)
    ).interactive()

    return alt.layer(interval, fact, anomalies).properties(width=870, height=450).configure_title(fontSize=20)



In [None]:
forecast['fact'] = df['y'].reset_index(drop = True)
pred = detect_anomalies(forecast)              
plot_anomalies(pred)

In [None]:
# Create holiday dataframe of anomalies that exist and don't have infinite importance (aka distance from model boundaries)
anomaly_df = pred.loc[(pred['anomaly']!=0) & (~np.isinf(pred['importance']))][['ds']]
anomaly_df['holiday'] = 'anomaly'
anomaly_df['lower_window'] = 0
anomaly_df['upper_window'] = 0
print("TOTAL ANOMALIES:", len(anomaly_df['ds']))
#anomaly_df

In [None]:
# Create the anomalies holidays dataframe
anomalies = pd.DataFrame(holidays.to_dict('records')+anomaly_df.to_dict('records'))
#anomalies

In [None]:
# Create list of anomalies to be removed from the dataset
remove_anomalies = pred.loc[(pred['anomaly']!=0) & (pred['importance'] >= 0.25) & (~np.isinf(pred['importance']))]['ds'].tolist()
print("TOTAL ANOMALIES TO REMOVE:", len(remove_anomalies))
#remove_anomalies

In [None]:
#modified_data = df.loc[(~df['ds'].isin(remove_anomalies)) & (df['ds']<='2020-2-1') | (df['ds']>='2020-12-31')] #remove anomalies and 2020 data
modified_data = df.loc[(~df['ds'].isin(remove_anomalies)) ] #remove anomalies

In [None]:
# Redo the forecast model with anolies removed and added to holidays as well as removing 2020 entirely
# Fit the model to the historical data
anomaly_model = Prophet(weekly_seasonality=True, yearly_seasonality=True, holidays=anomalies, seasonality_mode='multiplicative',holidays_prior_scale=0.5)# #, holidays_prior_scale=0.5, changepoint_prior_scale=0.01, seasonality_prior_scale=5.0, # holidays_prior_scale=0.5,changepoint_prior_scale=0.01, seasonality_prior_scale=1.0
anomaly_model.fit(modified_data) #df.loc[df['ds']<='2021-11-26']

# Create days for future prediction
anomaly_future = anomaly_model.make_future_dataframe(periods=180)

# Forecast
anomaly_forecast = anomaly_model.predict(anomaly_future)

# https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet
# Visualizing the locations of change points
fig = anomaly_model.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), anomaly_model, anomaly_forecast)

In [None]:
# Interactive figure of forecast
plot_plotly(anomaly_model, anomaly_forecast)

In [None]:
# Interactive figure of forecast
plot_components_plotly(anomaly_model, anomaly_forecast)

In [None]:
mask = ( anomaly_forecast['ds'] >= '2021-11-15') & ( anomaly_forecast['ds'] <= '2021-12-1')
final = anomaly_forecast.loc[mask][['ds','yhat']]
final

In [None]:
mask = ( anomaly_forecast['ds'] >= '2021-12-01') & ( anomaly_forecast['ds'] <= '2021-12-31')
final = anomaly_forecast.loc[mask][['ds','yhat']]
final['yhat'].sum()