##  Seasonal Pattern Removal

In [23]:
import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
from datetime import datetime

matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'G'
## Facebook Prophet
from fbprophet import Prophet

## Clean Sales

In [29]:
def clean_sales_data(input_file_path, output_file_path):
    #read_file
    df = pd.read_csv(input_file_path)
    df_additional_holidays = pd.read_csv('helper_table_holidays.csv')
    # convert to date type
    df['date'] = df['date'].apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))
    # extract only the date and sales
    df[["ds", "y"]] = df[["date", "sales_retail"]].rename(columns={"date": "ds", "sales_retail": "y"})
    # df[["ds", "y"]] = df[["date", "traffic"]].rename(columns={"date": "ds", "traffic": "y"})
    df_sub = df[["ds", "y"]]
    # build and train prophet model
    m = Prophet(holidays = df_additional_holidays)
    m.add_country_holidays(country_name='US')
    m.fit(df_sub)
    # forecast
    future = m.make_future_dataframe(periods=1)
    forecast = m.predict(future)
    # remove the "_upper" and ""
    upper_pattern_ls = list(forecast.filter(regex='_upper'))
    lower_pattern_ls = list(forecast.filter(regex='_lower'))
    forecast_ls = list(forecast.columns)
    # remove
    for pattern in upper_pattern_ls:
        forecast_ls.remove(pattern)
    for pattern in lower_pattern_ls:
        forecast_ls.remove(pattern)
    # get df with "_upper" & "_lower" column been removed
    df_remove = forecast.loc[:,forecast_ls] 
    #forecast_ls
    # get the pattern sum
    sum_ls = forecast_ls
    for col in ['ds','multiplicative_terms','yhat','additive_terms','holidays']:
        sum_ls.remove(col)
    df_remove['Pattern Sum'] = df_remove[sum_ls].sum(axis=1)
    #merge df and df_remove
    df_merge = pd.merge(df, df_remove[['ds','trend','holidays','weekly','yearly','Pattern Sum']], left_on='date', right_on='ds', how='left')

    df_merge['sales_cleaned'] = df_merge['sales_retail'] - df_merge['Pattern Sum']
    df_merge = df_merge[['date', 'sales_retail','sales_cleaned','trend','holidays','weekly','yearly']]
    df_merge.rename(columns={"sales_retail": "sales_original"}, inplace = True)

    # df_merge['traffic_cleaned'] = df_merge['traffic'] - df_merge['Pattern Sum']
    # df_merge = df_merge[['date', 'traffic','traffic_cleaned','trend','holidays','weekly','yearly']]
    # df_merge.rename(columns={"traffic": "traffic_original"}, inplace = True)
    df_merge.to_csv(path_or_buf=output_file_path, index = False)



    
    

## Sales_cleaned Example

In [30]:
input_file_path = "salea_traffic_cleaned_for_pattern_removal/1208_Orlando_FOA_sales_traffic_data.csv"
output_file_path = '/Users/yawenhan/Desktop/1208_Orlando_FOA_FOA_sales_cleaned.csv'
clean_sales_data(input_file_path, output_file_path)


INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


## Clean Traffic

In [34]:
def clean_traffic_data(input_file_path, output_file_path):
    #read_file
    df = pd.read_csv(input_file_path)
    df_additional_holidays = pd.read_csv('helper_table_holidays.csv')
    # convert to date type
    df['date'] = df['date'].apply(lambda x: datetime.strptime(x,'%Y-%m-%d'))
    # extract only the date and sales
    #df[["ds", "y"]] = df[["date", "sales_retail"]].rename(columns={"date": "ds", "sales_retail": "y"})
    df[["ds", "y"]] = df[["date", "traffic"]].rename(columns={"date": "ds", "traffic": "y"})
    df_sub = df[["ds", "y"]]
    # build and train prophet model
    m = Prophet(holidays = df_additional_holidays)
    m.add_country_holidays(country_name='US')
    m.fit(df_sub)
    # forecast
    future = m.make_future_dataframe(periods=1)
    forecast = m.predict(future)
    # remove the "_upper" and ""
    upper_pattern_ls = list(forecast.filter(regex='_upper'))
    lower_pattern_ls = list(forecast.filter(regex='_lower'))
    forecast_ls = list(forecast.columns)
    # remove
    for pattern in upper_pattern_ls:
        forecast_ls.remove(pattern)
    for pattern in lower_pattern_ls:
        forecast_ls.remove(pattern)
    # get df with "_upper" & "_lower" column been removed
    df_remove = forecast.loc[:,forecast_ls] 
    #forecast_ls
    # get the pattern sum
    sum_ls = forecast_ls
    for col in ['ds','multiplicative_terms','yhat','additive_terms','holidays']:
        sum_ls.remove(col)
    df_remove['Pattern Sum'] = df_remove[sum_ls].sum(axis=1)
    #merge df and df_remove
    df_merge = pd.merge(df, df_remove[['ds','trend','holidays','weekly','yearly','Pattern Sum']], left_on='date', right_on='ds', how='left')

#     df_merge['sales_cleaned'] = df_merge['sales_retail'] - df_merge['Pattern Sum']
#     df_merge = df_merge[['date', 'sales_retail','sales_cleaned','trend','holidays','weekly','yearly']]
#     df_merge.rename(columns={"sales_retail": "sales_original"}, inplace = True)

    df_merge['traffic_cleaned'] = df_merge['traffic'] - df_merge['Pattern Sum']
    df_merge = df_merge[['date', 'traffic','traffic_cleaned','trend','holidays','weekly','yearly']]
    df_merge.rename(columns={"traffic": "traffic_original"}, inplace = True)
    df_merge.to_csv(path_or_buf=output_file_path, index = False)



    
    

## Traffic_cleaned Example

In [35]:
input_file_path = "salea_traffic_cleaned_for_pattern_removal/1208_Orlando_FOA_sales_traffic_data.csv"
output_file_path = '/Users/yawenhan/Desktop/1208_Orlando_FOA_traffic_cleaned.csv'
clean_traffic_data(input_file_path, output_file_path)


INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
