# MLOps Stack para otimização de hiperparâmetros com MLflow e Optuna

O objetivo deste projeto é desenvolver uma solução de MLOps que integre MLflow e Optuna para otimizar hiperparâmetros em modelos de previsão de demanda. A solução deve ser capaz treinar experimentos, gerenciar versões do modelo automatizar o rocesso otimização e hiperparâmetros, garantindo assim um modelo mais preciso e eficiente. O processo de operacionalização o modelo deve ainda gerar artefatos para a melhor versão do modelo, que então será usado para inferência. Usaremos dos fictícios, com a demanda sendo variável alvo. O projeto inclui o dicionário de dados.

In [6]:
import sys
#!{sys.executable} -m pip install optuna
#!{sys.executable} -m pip install mlflow
#!{sys.executable} -m pip install xgboost

In [1]:
# Imports
import json
import math
import sklearn
import optuna
import mlflow
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


### Carregando dados de texto

In [2]:
dados = pd.read_csv("/home/priscila/Downloads/04-MLOps/dados/dataset1.csv")
dados.head()

Unnamed: 0,data,temperatura_media,precipitacao,fim_de_semana,feriado,preco_por_kg,promocao,demanda,demanda_dos_dias_anteriores,preco_por_kg_do_concorrente,intensidade_de_marketing
0,2010-10-18 09:16:07.883826,30.584727,1.199291,0,0,1.726258,1,1051.375336,1051.276659,1.935346,0.098677
1,2010-10-19 09:16:07.883825,15.465069,1.037626,0,0,0.576471,1,1106.855943,1051.276659,2.34472,0.019318
2,2010-10-20 09:16:07.883824,10.786525,5.656089,0,0,2.513328,1,1008.304909,1106.836626,0.998803,0.409485
3,2010-10-21 09:16:07.883823,23.648154,12.030937,0,0,1.839225,1,999.83381,1057.895424,0.76174,0.872803
4,2010-10-22 09:16:07.883822,13.861391,4.303812,0,0,1.531772,1,1183.949061,1048.961007,2.123436,0.820779


In [3]:
dados.shape

(5000, 11)

In [4]:
dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   data                         5000 non-null   object 
 1   temperatura_media            5000 non-null   float64
 2   precipitacao                 5000 non-null   float64
 3   fim_de_semana                5000 non-null   int64  
 4   feriado                      5000 non-null   int64  
 5   preco_por_kg                 5000 non-null   float64
 6   promocao                     5000 non-null   int64  
 7   demanda                      5000 non-null   float64
 8   demanda_dos_dias_anteriores  5000 non-null   float64
 9   preco_por_kg_do_concorrente  5000 non-null   float64
 10  intensidade_de_marketing     5000 non-null   float64
dtypes: float64(7), int64(3), object(1)
memory usage: 429.8+ KB


In [5]:
# Ajustar a coluna de data para o tipo correto
dados['data'] = pd.to_datetime(dados['data'])

In [7]:
dados.describe()

Unnamed: 0,data,temperatura_media,precipitacao,fim_de_semana,feriado,preco_por_kg,promocao,demanda,demanda_dos_dias_anteriores,preco_por_kg_do_concorrente,intensidade_de_marketing
count,5000,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2017-08-21 21:16:07.881014528,22.433182,4.962877,0.2856,0.033,1.758228,0.289,1258.452333,1282.446665,1.769682,0.848173
min,2010-10-18 09:16:07.883826,10.002439,7.1e-05,0.0,0.0,0.50036,0.0,707.952882,757.216503,0.500754,0.019318
25%,2014-03-21 03:16:07.882367232,16.311898,1.440718,0.0,0.0,1.13411,0.0,1066.040653,1089.993191,1.160719,0.77308
50%,2017-08-21 21:16:07.881031424,22.400265,3.520515,0.0,0.0,1.776194,0.0,1222.237635,1244.619215,1.764809,0.848107
75%,2021-01-22 15:16:07.879706624,28.570768,6.805668,1.0,0.0,2.383638,1.0,1422.792698,1445.791858,2.401827,0.928536
max,2024-06-25 09:16:07.878116,34.995334,40.072693,1.0,1.0,2.999606,1.0,2183.463942,2182.608701,2.999847,0.999979
std,,7.14661,4.932382,0.451745,0.178654,0.725334,0.453343,253.240475,249.087488,0.721041,0.093135
