Daniel Rocha Ruiz, MSc in Data Science and Business Analytics

Source:
- https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b

# Set-up
## Import packages

Installing Prophet can be quite tricky
- https://stackoverflow.com/questions/53178281/installing-fbprophet-python-on-windows-10?msclkid=c2c7badcd03b11ecbfda2a7e235e542f

In [None]:
from fbprophet import Prophet

In [None]:
import sys
!{sys.executable} -m pip install fbprophet

In [None]:
import pandas as pd

## Load data

In [None]:
# load data
df = pd.read_excel("../data/Superstore.xls")
print(df.columns)

## Prepare data

In [None]:
def prep_data(df, col):
    
    # select one category
    new_df = df.loc[df['Category'] == col]

    # only "Order Date" and "Sales" are kept
    new_df = new_df[["Order Date", "Sales"]]

    # make date the index
    new_df = new_df.sort_values('Order Date')
    #furniture = furniture.set_index('Order Date')

    # check if there are nulls
    assert all(new_df.isnull().sum()==0)

    # groupby, change index
    new_df = new_df.groupby('Order Date')['Sales'].sum().reset_index()
    new_df = new_df.set_index("Order Date")
    
    # resample converts the display frequency of the data (e.g. week -> month)
    # MS -> convert to Month Start
    ts = new_df['Sales'].resample('MS').mean()
        
    return ts

furniture = prep_data(df,"Furniture")
print(furniture.shape)
office = prep_data(df,"Office Supplies")
print(office.shape)

# Modelling
## Furniture

In [None]:
## get data
furniture = furniture.rename(columns={'Order Date': 'ds', 'Sales': 'y'})

# train model
furniture_model = Prophet(interval_width=0.95)
furniture_model.fit(furniture)

# plot componentes
furniture_model.plot_components(furniture_forecast)

# predict
furniture_forecast = furniture_model.make_future_dataframe(periods=36, freq='MS')
furniture_forecast = furniture_model.predict(furniture_forecast)

# plot
plt.figure(figsize=(18, 6))
furniture_model.plot(furniture_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Furniture Sales')
plt.show()

## Office-Supplies

In [None]:
# get data
office = office.rename(columns={'Order Date': 'ds', 'Sales': 'y'})

# train model
office_model = Prophet(interval_width=0.95)
office_model.fit(office)

# plot componentes
office_model.plot_components(office_forecast)

# predict
office_forecast = office_model.make_future_dataframe(periods=36, freq='MS')
office_forecast = office_model.predict(office_forecast)

# plot
plt.figure(figsize=(18, 6))
office_model.plot(office_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Office Supplies Sales')
plt.show()

furniture_model.plot_components(furniture_forecast)

## Compare series
### Merge data

In [None]:
# prepare
furniture_names = ['furniture_%s' % column for column in furniture_forecast.columns]
office_names = ['office_%s' % column for column in office_forecast.columns]

# merge
merge_furniture_forecast = furniture_forecast.copy()
merge_office_forecast = office_forecast.copy()
merge_furniture_forecast.columns = furniture_names
merge_office_forecast.columns = office_names

# forecast
forecast = pd.merge(merge_furniture_forecast, merge_office_forecast, how = 'inner', left_on = 'furniture_ds', right_on = 'office_ds')
forecast = forecast.rename(columns={'furniture_ds': 'Date'}).drop('office_ds', axis=1)
forecast.head()

### Plot

In [None]:
# create image
plt.figure(figsize=(10, 7))
# plot series
plt.plot(forecast['Date'], forecast['furniture_trend'], 'b-')
plt.plot(forecast['Date'], forecast['office_trend'], 'r-')
# other
plt.legend()
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Furniture vs. Office Supplies Sales Trend')
# plot
plt.plot()