# Forecasting com a biblioteca Prophet
Artigo sobre o Prophet: https://peerj.com/preprints/3190/<br>
Documentação: https://facebook.github.io/prophet/docs/quick_start.html#python-api

In [11]:
import MySQLdb
import decouple
import pandas as pd
import gspread
import gspread_dataframe as gd
import time

from tqdm.auto import tqdm
from decouple import Config, RepositoryEnv
from datetime import datetime
from fbprophet import Prophet #as novas versoes utlizam apenas from prophet import Prophet

In [12]:
## Configurando credenciais: dw e google sheets
config = decouple.Config(RepositoryEnv('/home/marlon/Documents/prophet/.env'))
gc = gspread.service_account(filename = 'key.json')
sh = gc.open_by_key(config('CODE_SHEET'))

## Extraindo os dados do Data Warehouse (DW)

In [13]:
def query_dw(query):
    con = MySQLdb.connect(host=config('DW_HOST'),
                         user=config('DW_USER'),
                         passwd=config('DW_PASSWORD'),
                         db='dw')
    dw = con.cursor()

    dw.execute(query)

    colunas = []
    for i in dw.description:
        colunas.append(i[0])

    return pd.DataFrame(list(dw.fetchall()), columns=colunas)

In [14]:
def gerar_dados_completos():
    print('Generating data...')
    inicio = time.time()
    dados = query_dw(
    """
        select 
            date,
            business_model,
            sum(gmv) as gmv,
            sum(tickets) as ticket
        from ft_results
        where year(date) >= 2017
        group by 1, 2
        order by 1
    """
    )
    dados['ano'] = pd.DatetimeIndex(dados['date']).year
    dados['mes'] = pd.DatetimeIndex(dados['date']).month
    
    gd.set_with_dataframe(worksheet=sh.worksheet('Dados'), dataframe=dados, include_index=False, \
                          include_column_header=True, resize=True)
    
    total = dados.groupby('date', as_index=False)[['gmv', 'ticket']].sum()
    total.columns = ['date', 'gmv_total', 'ticket_total']

    pivot = pd.pivot_table(data=dados, index='date', columns=['business_model'], values=['gmv', 'ticket'])
    pivot = pivot.reset_index()
    pivot = pivot.dropna(axis=1)
#     print(pivot)
    pivot.columns = ['date', 'gmv_ota', 'gmv_outras_otas', 'gmv_parc', 'gmv_wl', 'ticket_ota', 'ticket_outras_otas','ticket_parc', 'ticket_wl']
    pivot = pivot.drop(columns=['gmv_outras_otas','ticket_outras_otas'])
    
    join = pd.merge(pivot, total, on='date')
    join.to_csv('data/gmv_full.csv', sep=',', index=False)
    fim = time.time()
    
    print(f'Data generated! Time: {fim - inicio} s')

## Aplicando Prophet

In [15]:
def high_season(ds):
    date = pd.to_datetime(ds)
    return date.month == 12 or date.month == 7 or date.month == 2

In [16]:
def modelo(dados, metrica, data_corte, holidays):
    dados['ds'] = pd.to_datetime(dados['ds'])
    
    # Removendo dos dados o período da queda de vendas na pandemia
    dados.loc[(dados['ds'] > '2020-03-14') & (dados['ds'] < '2020-07-01'), 'y'] = None

    # Aplicando o regressor da high season
    dados['high_season'] = dados['ds'].apply(high_season)
    
    # Modelo preditivo 
    modelo = Prophet(holidays                = holidays,
                     daily_seasonality       = False, 
                     weekly_seasonality      = True, 
                     yearly_seasonality      = True,
                     interval_width          = 0.8,
                     n_changepoints          = 25,
                     changepoint_range       = 0.85,
                     changepoint_prior_scale = 0.05,
                     holidays_prior_scale    = 10.0,
                     seasonality_prior_scale = 5.0, 
                     seasonality_mode        = 'multiplicative')
    modelo.add_regressor('high_season')
    
    modelo.fit(dados[dados['ds'] <= data_corte])
    
    return modelo

In [17]:
def previsao(modelo, data_corte, data_previsao):
    # Definindo o período para previsão
    data_corte = datetime.strptime(data_corte, "%Y-%m-%d")
    data_previsao = datetime.strptime(data_previsao, "%Y-%m-%d")
    period = data_previsao - data_corte
    
    data_futuro = modelo.make_future_dataframe(periods=period.days)
    data_futuro['high_season'] = data_futuro['ds'].apply(high_season)
    previsao = modelo.predict(data_futuro)
    
#     previsao['yhat'] = previsao['yhat'] * 1.05 #apply(lambda x: x*1.05) 
    
    return previsao

In [18]:
# Variaveis a serem modificadas #
data_inicio_previsao = '2023-10-01'
data_fim_previsao = '2023-11-30'
gerar_dados = 1 # caso o dataset esteja desatualizado ou não exista, 1 para gerar ou atualizar e 0 caso contrário

In [19]:
metricas = ['gmv_total', 'gmv_ota', 'gmv_parc', 'gmv_wl', 'ticket_total', 'ticket_ota', 'ticket_parc', 'ticket_wl']

calendar = pd.read_table('data/calendar.tsv')
holidays = calendar[(calendar['ds'] >= '2017-01-01') & (calendar['ds'] <= '2023-12-31')]

if gerar_dados:
    gerar_dados_completos()

dataset = pd.read_csv('data/gmv_full.csv')
dt_cal = dataset['date'].iloc[-1]    # data até onde tem dados reais; ou pode ser definida uma outra data especifica

for metrica in tqdm(metricas):
    dados = dataset[['date', metrica]]
    dados.columns = ['ds', 'y']
    
    model = modelo(dados, metrica, dt_cal, holidays) 
    forecast = previsao(model, dt_cal, data_fim_previsao)
    
    forecast = forecast[['ds', 'yhat_lower', 'yhat_upper', 'yhat']]
    forecast = forecast[forecast['ds'] >= data_inicio_previsao]
    
    dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
    dados = dados[['ds', 'y']]
    
    forecast = pd.merge(forecast, dados, how='left', on='ds')
    
    gd.set_with_dataframe(worksheet=sh.worksheet(metrica), dataframe=forecast, include_index=False, \
                          include_column_header=True, resize=True)
    
#     forecast.to_csv('previsoes/'+metrica+'.csv', sep=',', index=False)

Generating data...


Data generated! Time: 131.45418286323547


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['high_season'] = dados['ds'].apply(high_season)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.holidays['ds'] = pd.to_datetime(self.holidays['ds'])
  components = components.append(new_comp)
  compon

Initial log joint probability = -31.577
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        5920.8     0.0386707       1128.95           1           1      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       6034.27    0.00525219       490.305           1           1      244   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       6110.81     0.0699698       645.978      0.2338           1      367   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       6162.71    0.00156779       490.884      0.7683      0.7683      490   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       6176.98   0.000172263       245.066      0.8118      0.8118      613   
    Iter      log prob        ||dx||      ||grad||       alpha   

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -35.7185
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5976.26     0.0410862       611.485           1           1      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       6079.68     0.0664239       1775.84           1           1      244   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       6160.93    0.00313352       970.272      0.1095           1      356   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        6186.8    0.00376938       438.483           1           1      481   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       6194.68     0.0122214       493.935      0.3803           1      611   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -39.0133
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5484.18    0.00608585       390.336           1           1      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5529.25    0.00949109       566.018     0.08871           1      248   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       5558.46     0.0162173       325.789           1           1      368   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        5574.5    0.00249661       436.599       0.248       0.248      485   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5579.77    0.00264058       198.666      0.1836           1      603   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -47.212
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5656.16     0.0273425       822.996           1           1      121   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5767.68     0.0924151       506.165           1           1      234   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       5821.09     0.0277425       922.548           1           1      353   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       5849.01     0.0162109       193.492           1           1      465   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5864.69     0.0332488       386.109           1           1      590   
    Iter      log prob        ||dx||      ||grad||       alpha   

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -20.4898
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5766.02     0.0159733        1891.9           1           1      119   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5833.33    0.00582888       417.275           1           1      238   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       5897.73     0.0340946       611.839           1           1      356   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       5931.18    0.00218056       69.4726           1           1      474   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5935.25     0.0307915       224.516           1           1      601   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -24.1435
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5768.17     0.0377271       614.496           1           1      122   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5835.55     0.0172222       342.359      0.3933           1      245   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       5874.11    0.00221871       473.342      0.2324      0.2324      367   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       5901.91    0.00346998       282.433      0.5048      0.5048      489   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5909.05    0.00216436       154.672           1           1      613   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -20.8904
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       5879.73     0.0182917       1081.16        1.26       0.126      121   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5954.06     0.0218244       1305.24      0.4836           1      247   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        5976.1   0.000855823       219.527           1           1      361   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       5985.14    0.00222518       101.821           1           1      488   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5986.95    0.00326942       110.899      0.8807     0.08807      611   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

Initial log joint probability = -56.1225
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        5494.1    0.00840164       669.671           1           1      118   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       5595.82     0.0457801       1055.88           1           1      230   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        5637.6     0.0010597       642.409      0.2423      0.2423      345   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       5656.58     0.0160684       342.665           1           1      464   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       5674.68   0.000517353       110.707      0.9195      0.9195      590   
    Iter      log prob        ||dx||      ||grad||       alpha  

  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
  components = components.append(new_comp)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados['ds'] = pd.to_datetime(dados['ds']) # necessario para fazer o merge
100%|██████████| 8/8 [02:49<00:00, 21.23s/it]
