Importando as bibliotecas para o tratamento e visualização dos dados.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import holidays
import datetime
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.holtwinters import ExponentialSmoothing

Realizando a leitura da base de dados.

In [None]:
df: pd.DataFrame = pd.read_excel('data/01 Call-Center-Dataset.xlsx')
df.head()

Contando a quantidade de chamadas recebidas por dia.

In [None]:
dayly: pd.DataFrame = df[['Call Id', 'Date']].groupby('Date').count()
dayly.rename(columns={'Call Id': 'Calls'}, inplace=True)
dayly.index = pd.to_datetime(dayly.index)

dayly.head()

Visualização inicial dos dados.

In [None]:
plt.figure(figsize=(16, 3))
plt.plot(dayly.index, dayly['Calls'])
plt.xlabel('Data')
plt.ylabel('Quantidade de Chamadas')
plt.title('Chamadas por Dia')
plt.xticks(dayly.index[::8])
plt.show()

Estatísticas básicas das chamadas diárias:

In [None]:
dayly.describe()

Calculando a média de chamadas por dia da semana.

In [None]:
weekly_means: pd.DataFrame = dayly.copy()
weekly_means['Day_name'] = weekly_means.index.day_name()
weekly_means = weekly_means.groupby('Day_name').mean()
weekly_means = weekly_means.reindex(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'])
weekly_means.rename(columns={'Calls': 'Mean Calls'}, inplace=True)

weekly_means

Visualizando em grafico de barras.

In [None]:
plt.figure(figsize=(8, 3))
plt.bar(weekly_means.index, weekly_means['Mean Calls'])
plt.xlabel('Dias da Semana')
plt.ylabel('Média de Chamadas')
plt.title('Médias de Chamadas por Dia da Semana')
plt.show()

Feriados brasileiros do ano de 2021

In [None]:
br_holidays: holidays.BR = holidays.BR(years=2021)

holiday_df: pd.DataFrame = pd.DataFrame.from_dict(br_holidays.items())
holiday_df.columns = ['Date', 'Holiday']
holiday_df['Date'] = pd.to_datetime(holiday_df['Date'])
holiday_df.set_index('Date', inplace=True)

holiday_df

In [None]:
# Adiciona Carnaval e Corpus Christi manualmente
holiday_df.loc[datetime.datetime(2021, 2, 15)] = 'Carnaval'
holiday_df.loc[datetime.datetime(2021, 2, 16)] = 'Carnaval'
holiday_df.loc[datetime.datetime(2021, 6, 3)] = 'Corpus Christi'
holiday_df.sort_index(inplace=True)

holiday_df

In [None]:
holydays_observed: pd.DataFrame = holiday_df.merge(dayly, how='inner', on='Date')

holydays_observed

In [None]:
# Separando os dados em treino e teste (últimas 2 semanas)
train = dayly[:-14]
test = dayly[-14:]

# Criando features para o modelo
train_features = pd.DataFrame({
    'day_of_week': train.index.dayofweek,
    'day_of_month': train.index.day,
    'month': train.index.month,
    'is_holiday': train.index.isin(holiday_df.index)
})

test_features = pd.DataFrame({
    'day_of_week': test.index.dayofweek,
    'day_of_month': test.index.day,
    'month': test.index.month,
    'is_holiday': test.index.isin(holiday_df.index)
})

In [None]:
# Test different Holt-Winters configurations
models = {
    'Additive trend, Additive seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='add', seasonal='add'
    ).fit(),
    'Additive trend, Multiplicative seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='add', seasonal='mul'
    ).fit(),
    'Multiplicative trend, Additive seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='mul', seasonal='add'
    ).fit(),
    'Multiplicative trend, Multiplicative seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='mul', seasonal='mul'
    ).fit(),
    'Damped Additive trend, Additive seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='add', seasonal='add', damped_trend=True
    ).fit(),
    'Damped Additive trend, Multiplicative seasonal': ExponentialSmoothing(
        train['Calls'].asfreq('D'), seasonal_periods=7, trend='add', seasonal='mul', damped_trend=True
    ).fit()
}

# Make predictions
forecasts = {}
for name, model in models.items():
    # Make predictions for test period
    predictions = model.forecast(len(test))
    
    # Calculate metrics
    mse = mean_squared_error(test['Calls'], predictions)
    r2 = r2_score(test['Calls'], predictions)
    
    # Store results
    forecasts[name] = {
        'predictions': predictions,
        'mse': mse,
        'r2': r2
    }

# Find best model
best_mse = float('inf')
best_model = None
for name, results in forecasts.items():
    if results['mse'] < best_mse:
        best_mse = results['mse']
        best_model = name

print(f'Best model: {best_model}')
print(f'MSE: {best_mse:.2f}')
print(f'R²: {forecasts[best_model]['r2']:.2f}')

# Plot the results
plt.figure(figsize=(16, 3))
plt.plot(train.index, train['Calls'], label='Dados de Treino')
plt.plot(test.index, test['Calls'], label='Dados de Teste')
plt.plot(test.index, forecasts[best_model]['predictions'], label='Previsão')
plt.xlabel('Data')
plt.ylabel('Quantidade de Chamadas')
plt.title('Comparação entre Previsões e Dados Reais')
plt.legend()
plt.show()