**Optimización de parámetros  modelo Facebook Prophet**

In [3]:
import numpy as np
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn import metrics
%matplotlib inline
from matplotlib.pylab import rcParams
import seaborn as sns
rcParams['figure.figsize']=10,8

  import pandas.util.testing as tm


In [4]:
import warnings
import itertools
warnings.filterwarnings("ignore") # specify to ignore warning messages

In [5]:
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [6]:
df = pd.read_csv('/content/gdrive/My Drive/df_clean.csv', parse_dates=['Datetime'], index_col='Datetime',  header=0)


In [7]:
import logging
logging.getLogger('fbprophet').setLevel(logging.ERROR)


In [8]:
import fbprophet
import holidays
Prophet = fbprophet.Prophet

In [9]:
upsampled = df.resample('D')
interpolated = upsampled.interpolate(method='linear').shift(6)
len(interpolated)
df_o3_resample = interpolated
df_prophet=df_o3_resample
df_prophet=df_prophet.reset_index()
df_prophet=df_prophet.rename(columns={'Datetime':'ds',
                     'O3':'y'})

In [10]:
holidays_df = pd.DataFrame([], columns = ['ds','holiday'])
ldates = []
lnames = []
for date, name in sorted(holidays.UK( years=np.arange(2008, 2019 + 1)).items()):
    ldates.append(date)
    lnames.append(name)
holidays_df.loc[:,'ds'] = ldates
holidays_df.loc[:,'holiday'] = lnames
holidays_df.loc[:,'holiday'] = holidays_df.loc[:,'holiday'].apply(lambda x : x.replace(' (Observed)',''))
holidays_df.holiday.unique()

array(["New Year's Day", 'New Year Holiday [Scotland]',
       "St. Patrick's Day [Northern Ireland]", 'Good Friday',
       'Easter Monday [England, Wales, Northern Ireland]', 'May Day',
       'Spring Bank Holiday', 'Battle of the Boyne [Northern Ireland]',
       'Summer Bank Holiday [Scotland]',
       'Late Summer Bank Holiday [England, Wales, Northern Ireland]',
       "St. Andrew's Day [Scotland]", 'Christmas Day', 'Boxing Day',
       'Wedding of William and Catherine',
       "New Year Holiday [Scotland], New Year's Day",
       'Diamond Jubilee of Elizabeth II'], dtype=object)

In [11]:
TEST_SIZE = 365
train, test = df_prophet.iloc[:-TEST_SIZE], df_prophet.iloc[-TEST_SIZE:]   

In [12]:
from sklearn.model_selection import ParameterGrid
params_grid = {'seasonality_mode':('multiplicative','additive'),
               'changepoint_prior_scale':[0.05, 0.1, 1,10],
              'n_changepoints' : [100,150,200]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)

Total Possible Models 24


In [13]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [14]:
import random

strt='2018-07-01'
end='2019-07-01'
model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])
for p in grid:
  test = pd.DataFrame()
  print(p)
  random.seed(0)
  train_model =Prophet(holidays=holidays_df, changepoint_prior_scale = p['changepoint_prior_scale'],
                         
                         n_changepoints = p['n_changepoints'],
                         seasonality_mode = p['seasonality_mode'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)  
  train_model.fit(train)
  train_forecast = train_model.make_future_dataframe(periods=365, freq='D',include_history = False)
  train_forecast = train_model.predict(train_forecast)
  test=train_forecast[['ds','yhat']]
  Actual = df_prophet[(df_prophet['ds']>strt) & (df_prophet['ds']<=end)]
  MAPE = mean_absolute_percentage_error(Actual['y'],abs(test['yhat']))
  print('Mean Absolute Percentage Error(MAPE)------------------------------------',MAPE)
  model_parameters = model_parameters.append({'MAPE':MAPE,'Parameters':p},ignore_index=True)

{'changepoint_prior_scale': 0.05, 'n_changepoints': 100, 'seasonality_mode': 'multiplicative'}


INFO:numexpr.utils:NumExpr defaulting to 4 threads.


Mean Absolute Percentage Error(MAPE)------------------------------------ 17.445464570930223
{'changepoint_prior_scale': 0.05, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 17.43488920904063
{'changepoint_prior_scale': 0.05, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 14.773180135879274
{'changepoint_prior_scale': 0.05, 'n_changepoints': 200, 'seasonality_mode': 'multiplicative'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 18.14291306660053
{'changepoint_prior_scale': 0.05, 'n_changepoints': 200, 'seasonality_mode': 'additive'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 14.164963803416109
{'changepoint_prior_scale': 0.1, 'n_changepoints': 100, 'seasonality_mode': 'multiplicative'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 17.7876458429

In [15]:
parameters_fb = model_parameters.sort_values(by=['MAPE'])
parameters_fb = parameters_fb.reset_index(drop=True)
parameters_fb.to_csv('parameters_fb_prophet.csv')
parameters_fb.head()

Unnamed: 0,MAPE,Parameters
0,14.164964,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
1,14.54155,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
2,14.77318,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
3,17.434889,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
4,17.445465,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."


In [16]:
parameters_fb['Parameters'][0]

{'changepoint_prior_scale': 0.05,
 'n_changepoints': 200,
 'seasonality_mode': 'additive'}

Parámetros sin festivos

In [17]:
import random

strt='2018-07-01'
end='2019-07-01'
model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])
for p in grid:
  test = pd.DataFrame()
  print(p)
  random.seed(0)
  train_model =Prophet( changepoint_prior_scale = p['changepoint_prior_scale'],
                         
                         n_changepoints = p['n_changepoints'],
                         seasonality_mode = p['seasonality_mode'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)  
  train_model.fit(train)
  train_forecast = train_model.make_future_dataframe(periods=365, freq='D',include_history = False)
  train_forecast = train_model.predict(train_forecast)
  test=train_forecast[['ds','yhat']]
  Actual = df_prophet[(df_prophet['ds']>strt) & (df_prophet['ds']<=end)]
  MAPE = mean_absolute_percentage_error(Actual['y'],abs(test['yhat']))
  print('Mean Absolute Percentage Error(MAPE)------------------------------------',MAPE)
  model_parameters = model_parameters.append({'MAPE':MAPE,'Parameters':p},ignore_index=True)

{'changepoint_prior_scale': 0.05, 'n_changepoints': 100, 'seasonality_mode': 'multiplicative'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 17.782711393712248
{'changepoint_prior_scale': 0.05, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 14.681564065436753
{'changepoint_prior_scale': 0.05, 'n_changepoints': 150, 'seasonality_mode': 'multiplicative'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 17.775031896441877
{'changepoint_prior_scale': 0.05, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 14.469017075385434
{'changepoint_prior_scale': 0.05, 'n_changepoints': 200, 'seasonality_mode': 'multiplicative'}
Mean Absolute Percentage Error(MAPE)------------------------------------ 17.521383856556003
{'changepoint_prior_scale': 0.05, 'n_changepoints': 200, 'seasonality_mode': 

In [18]:
parameters_fb2 = model_parameters.sort_values(by=['MAPE'])
parameters_fb2 = parameters_fb2.reset_index(drop=True)
parameters_fb2.to_csv('parameters_fb_prophet2.csv')
parameters_fb2.head()

Unnamed: 0,MAPE,Parameters
0,14.404092,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
1,14.469017,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
2,14.681564,"{'changepoint_prior_scale': 0.05, 'n_changepoi..."
3,17.374019,"{'changepoint_prior_scale': 0.1, 'n_changepoin..."
4,17.41959,"{'changepoint_prior_scale': 1, 'n_changepoints..."


In [19]:
parameters_fb2['Parameters'][0]

{'changepoint_prior_scale': 0.05,
 'n_changepoints': 200,
 'seasonality_mode': 'additive'}

**Referencias**

https://www.kaggle.com/manovirat/timeseries-using-prophet-hyperparameter-tuning