In [65]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm
import sys
sys.path.insert(0, './utils/')
import functions
import importlib
importlib.reload(functions)
import numpy as np
import warnings
import pickle

In [45]:
data = pd.read_csv('./data/monthly_sales.csv',index_col=0)
data.index = pd.to_datetime(data.index)
data = data.asfreq('ME')

In [47]:
data.head()

Unnamed: 0,Furniture,Office Supplies,Technology,Furniture_log,Office Supplies_log,Technology_log
2011-01-31,5951.859,4851.08,3143.29,8.691627,8.487163,8.053343
2011-02-28,2130.324,1071.724,1608.51,7.664499,6.977956,7.383685
2011-03-31,14573.956,8605.879,32511.174,9.58706,9.060317,10.38937
2011-04-30,7944.837,11155.074,9195.434,8.980403,9.319739,9.126571
2011-05-31,6912.787,7135.624,9599.876,8.841273,8.872995,9.16961


In [69]:
sales_furniture = data['Furniture']
sales_office = data['Office Supplies']
sales_technology = data['Technology']

## Models

Para cada categoría [Furniture,	Office Supplies, Technology] voy a estimar una serie temporal distinta. Es decir, pronosticaré ventas futuras por cada categoría. 

El primer modelo es un modelo SARIMA con las siguientes caracteristicas basadas en los estudios realizados en el notebook 'modeling_arima' (Grid Search + CV):

- Furniture: SARIMA (0, 1, 1), (1, 0, 1, 12)
- Office Supplies:  SARIMA  (1, 1, 1), (1, 0, 1, 12)
- Technology: SARIMA (1, 1, 1), (1, 0, 0, 12)

In [66]:

class SARIMAForecaster:
    def __init__(self, order, seasonal_order, log_transform=True):
        self.order = order
        self.seasonal_order = seasonal_order
        self.log_transform = log_transform
        self.model_fit = None
        self.original_series = None

    def fit(self, series):
        self.original_series = series
        if self.log_transform:
            series = np.log(series)
        self.model_fit = SARIMAX(series, order=self.order, seasonal_order=self.seasonal_order).fit()
        return self

    def forecast(self, steps=6, bias_correction=False):
        forecast_log = self.model_fit.forecast(steps=steps)
        if self.log_transform:
            if bias_correction:
                resid_var = self.model_fit.resid.var()
                return np.exp(forecast_log + 0.5 * resid_var)
            return np.exp(forecast_log)
        return forecast_log

    def save(self, path):
        with open(path, 'wb') as f:
            pickle.dump(self, f)

    @staticmethod
    def load(path):
        with open(path, 'rb') as f:
            return pickle.load(f)

In [70]:
# Create models for each category
furn_model = SARIMAForecaster(order=(0,1,1), seasonal_order=(1,0,1,12)).fit(sales_furniture)
tech_model = SARIMAForecaster(order=(1,1,1), seasonal_order=(1,0,0,12)).fit(sales_technology)
office_model = SARIMAForecaster(order=(1,1,1), seasonal_order=(1,0,1,12)).fit(sales_office)

# Save models
furn_model.save('model_furniture.pkl')
tech_model.save('model_technology.pkl')
office_model.save('model_office.pkl')

# Predict:
loaded = SARIMAForecaster.load('model_furniture.pkl')
forecast = loaded.forecast(steps=6)