# Treinamento

## Inicialização

In [45]:
# Config Inicial
import json
import mlflow.pyfunc
import pandas as pd
import requests
from sklearn.metrics import r2_score
import mlflow
from mlflow.tracking import MlflowClient

### Leitura

In [46]:
# Carregar o conjunto de dados
df_treinamento = pd.read_csv("dataset/dados_treinamento.csv", encoding="utf-8")

# Eliminando registros com valores null
df_treinamento.dropna(inplace=True)

# float64
df_treinamento = df_treinamento.astype({col: 'float64' for col in df_treinamento.select_dtypes(include='int').columns})

df_treinamento.head()

Unnamed: 0,id,property_type,state,region,lat,lon,area_m2,price_brl,city
0,1.0,apartment,Pernambuco,Northeast,-8.134204,-34.906326,72.0,414222.98,Recife
1,2.0,apartment,Pernambuco,Northeast,-8.126664,-34.903924,136.0,848408.53,Recife
2,3.0,apartment,Pernambuco,Northeast,-8.12555,-34.907601,75.0,299438.28,Recife
3,4.0,apartment,Pernambuco,Northeast,-8.120249,-34.89592,187.0,848408.53,Recife
4,5.0,apartment,Pernambuco,Northeast,-8.142666,-34.906906,80.0,464129.36,Recife


### Amostragem

In [47]:
# amostra
df_sample = df_treinamento.sample(n=20, random_state=42)

x_features = df_sample.drop(["price_brl"], axis=1)  # Features
y_target = df_sample["price_brl"]  # Variável alvo

df_sample.head()

Unnamed: 0,id,property_type,state,region,lat,lon,area_m2,price_brl,city
6383,6384.0,apartment,Rio de Janeiro,Southeast,-22.921227,-43.225548,125.0,787484.34,Rio de Janeiro
9172,9173.0,apartment,Santa Catarina,South,-27.750971,-48.779408,241.0,933875.69,Santo Amaro da Imperatriz
3868,3869.0,apartment,Rio de Janeiro,Southeast,-22.95433,-43.195595,112.0,1397378.74,Rio de Janeiro
4676,4677.0,apartment,Rio de Janeiro,Southeast,-23.000669,-43.392197,106.0,998127.67,Rio de Janeiro
18133,18134.0,house,SP,southeast,-23.588417,-46.664858,350.0,4000000.0,São Paulo


### Modelo Preditor

In [48]:

def carregar_modelo_com_melhor_r2(experimento_nome):
    client = MlflowClient()

    # Pega o experimento
    experimento = client.get_experiment_by_name(experimento_nome)
    if not experimento:
        raise ValueError(f"Experimento '{experimento_nome}' não encontrado.")
    
    experiment_id = experimento.experiment_id

    

    # Pega todas as runs ordenadas por R² (decrescente)
    runs = client.search_runs(
        experiment_ids=[experiment_id],
        order_by=["metrics.r2 DESC"]
    )

    if not runs:
        raise ValueError("Nenhuma run com métrica R² encontrada.")

    melhor_run = runs[0]
    run_id = melhor_run.info.run_id
    r2 = melhor_run.data.metrics["r2"]

    # Descobre qual foi o nome do artefato/modelo salvo
    artefatos = client.list_artifacts(run_id)
    nome_modelo = None
    for a in artefatos:
        if a.is_dir is False:  # ignora arquivos soltos, procura por diretório de modelo
            continue
        nome_modelo = a.path  # geralmente "xgboost_model", "model", etc.
        break

    if not nome_modelo:
        raise ValueError("Nenhum modelo encontrado nos artefatos da melhor run.")

    versao_modelo = None
    for mv in client.search_model_versions(f"run_id = '{run_id}'"):
        if mv.name == nome_modelo:
            versao_modelo = mv.version
            break
    
    model_uri = f"runs:/{run_id}/{nome_modelo}"
    
    print(f"Carregando modelo '{nome_modelo}' versão {versao_modelo} da run {run_id} com R² = {r2:.4f}")

    modelo_carregado = mlflow.pyfunc.load_model(model_uri)
    
    return modelo_carregado

# Carregando modelo com melhor R2 
loaded_model = carregar_modelo_com_melhor_r2("ecd15")

loaded_model


Carregando modelo 'random_forest_model' versão 31 da run 2f187ed8dfb748faa9e0a1278dd7ee7c com R² = 0.8904


mlflow.pyfunc.loaded_model:
  artifact_path: random_forest_model
  flavor: mlflow.sklearn
  run_id: 2f187ed8dfb748faa9e0a1278dd7ee7c

## Predição Estática

In [49]:
# Fazer a previsão
predictions = loaded_model.predict(x_features)

# Adicionar as previsões ao DataFrame
df_sample["predictions"] = predictions

# converter previsões para o mesmo tipo da variável alvo
df_sample["predictions"] = df_sample["predictions"].astype(float).round(2)

# Exibir o DataFrame com as previsões
df_sample[["price_brl", "predictions"]]

Unnamed: 0,price_brl,predictions
6383,787484.34,844436.32
9172,933875.69,1100024.97
3868,1397378.74,1401079.65
4676,998127.67,1009376.21
18133,4000000.0,5049400.0
13351,1890000.0,1792180.0
17172,1700000.0,1626377.39
17379,1300000.0,1281245.14
5818,289457.01,309616.08
20470,3750000.0,3660319.6


### R² Score

In [50]:
# Calcular R²
r2 = r2_score(df_sample["price_brl"], df_sample["predictions"])
print(f"R²: {r2:.4f}")

R²: 0.9782


## Predição API

In [51]:
# gerando o JSON
# Gerar o JSON no formato esperado pelo MLflow Serve
dados = {"instances": x_features.to_dict(orient="records")}
dados

{'instances': [{'id': 6384.0,
   'property_type': 'apartment',
   'state': 'Rio de Janeiro',
   'region': 'Southeast',
   'lat': -22.921227,
   'lon': -43.225548,
   'area_m2': 125.0,
   'city': 'Rio de Janeiro'},
  {'id': 9173.0,
   'property_type': 'apartment',
   'state': 'Santa Catarina',
   'region': 'South',
   'lat': -27.750971,
   'lon': -48.779408,
   'area_m2': 241.0,
   'city': 'Santo Amaro da Imperatriz'},
  {'id': 3869.0,
   'property_type': 'apartment',
   'state': 'Rio de Janeiro',
   'region': 'Southeast',
   'lat': -22.95433,
   'lon': -43.195595,
   'area_m2': 112.0,
   'city': 'Rio de Janeiro'},
  {'id': 4677.0,
   'property_type': 'apartment',
   'state': 'Rio de Janeiro',
   'region': 'Southeast',
   'lat': -23.000669,
   'lon': -43.392197,
   'area_m2': 106.0,
   'city': 'Rio de Janeiro'},
  {'id': 18134.0,
   'property_type': 'house',
   'state': 'SP',
   'region': 'southeast',
   'lat': -23.588417123946847,
   'lon': -46.66485825341983,
   'area_m2': 350.0,
   '

In [52]:
json_data = json.dumps(dados)
print(json_data)

{"instances": [{"id": 6384.0, "property_type": "apartment", "state": "Rio de Janeiro", "region": "Southeast", "lat": -22.921227, "lon": -43.225548, "area_m2": 125.0, "city": "Rio de Janeiro"}, {"id": 9173.0, "property_type": "apartment", "state": "Santa Catarina", "region": "South", "lat": -27.750971, "lon": -48.779408, "area_m2": 241.0, "city": "Santo Amaro da Imperatriz"}, {"id": 3869.0, "property_type": "apartment", "state": "Rio de Janeiro", "region": "Southeast", "lat": -22.95433, "lon": -43.195595, "area_m2": 112.0, "city": "Rio de Janeiro"}, {"id": 4677.0, "property_type": "apartment", "state": "Rio de Janeiro", "region": "Southeast", "lat": -23.000669, "lon": -43.392197, "area_m2": 106.0, "city": "Rio de Janeiro"}, {"id": 18134.0, "property_type": "house", "state": "SP", "region": "southeast", "lat": -23.588417123946847, "lon": -46.66485825341983, "area_m2": 350.0, "city": "S\u00e3o Paulo"}, {"id": 13352.0, "property_type": "house", "state": "SP", "region": "southeast", "lat": 

### Request 

In [53]:
previsao = requests.post(
    "http://localhost:5000/invocations",
    headers={"Content-Type": "application/json"},
    data=json_data
)

print("HTTP STATUS", previsao.status_code)

HTTP STATUS 200


In [54]:
# Exibir o resultado da previsão
previsao.text

'{"predictions": [978233.0625, 906950.25, 1373972.5, 1051356.75, 1151326.0, 669886.3125, 1369107.5, 1067672.75, 358558.21875, 1130626.0, 673395.375, 715957.8125, 689171.1875, 1025426.875, 687039.875, 1130626.0, 1029650.625, 959656.5, 849469.125, 1066053.125]}'

In [55]:
# Converter o resultado da previsão em JSON
previsao_json = json.loads(previsao.text)

# Extrair as previsões do JSON e adicioná-las ao DataFrame
df_sample["predict_api"] = [round(pred, 2) for pred in previsao_json["predictions"]]

# Exibir o DataFrame atualizado
df_sample[["price_brl", "predictions", "predict_api"]]

Unnamed: 0,price_brl,predictions,predict_api
6383,787484.34,844436.32,978233.06
9172,933875.69,1100024.97,906950.25
3868,1397378.74,1401079.65,1373972.5
4676,998127.67,1009376.21,1051356.75
18133,4000000.0,5049400.0,1151326.0
13351,1890000.0,1792180.0,669886.31
17172,1700000.0,1626377.39,1369107.5
17379,1300000.0,1281245.14,1067672.75
5818,289457.01,309616.08,358558.22
20470,3750000.0,3660319.6,1130626.0


### R² Score API

In [57]:
r2_score_api = r2_score(df_sample["price_brl"], df_sample["predict_api"])
print(f"R² API: {r2_score_api:.4f}")

R² API: -0.4037
