# Treinamento

## Inicialização

In [1]:
# Config Inicial
import json

import mlflow.pyfunc

import pandas as pd

import requests

from sklearn.metrics import r2_score

### Leitura

In [2]:
# Carregar o conjunto de dados
df_treinamento = pd.read_csv("dataset/brasil_estado_cidade.csv", encoding="utf-8")

# Eliminando registros com valores null
df_treinamento.dropna(inplace=True)

# float64
df_treinamento = df_treinamento.astype({col: 'float64' for col in df_treinamento.select_dtypes(include='int').columns})

df_treinamento.head()

Unnamed: 0,id,property_type,state,region,lat,lon,area_m2,price_brl,city
0,1.0,apartment,Pernambuco,Northeast,-8.134204,-34.906326,72.0,414222.98,Recife
1,2.0,apartment,Pernambuco,Northeast,-8.126664,-34.903924,136.0,848408.53,Recife
2,3.0,apartment,Pernambuco,Northeast,-8.12555,-34.907601,75.0,299438.28,Recife
3,4.0,apartment,Pernambuco,Northeast,-8.120249,-34.89592,187.0,848408.53,Recife
4,5.0,apartment,Pernambuco,Northeast,-8.142666,-34.906906,80.0,464129.36,Recife


### Amostragem

In [3]:
# amostra
df_sample = df_treinamento.sample(n=20, random_state=42)

x_features = df_sample.drop(["price_brl"], axis=1)  # Features
y_target = df_sample["price_brl"]  # Variável alvo

df_sample.head()

Unnamed: 0,id,property_type,state,region,lat,lon,area_m2,price_brl,city
3823,3824.0,apartment,Rio de Janeiro,Southeast,-22.930788,-43.355751,62.0,419213.6,Rio de Janeiro
376,377.0,apartment,Piau,Northeast,-5.064457,-42.79275,60.0,319400.84,Teresina
5473,5474.0,apartment,Rio de Janeiro,Southeast,-22.933163,-43.17466,78.0,1287584.69,Rio de Janeiro
2282,2283.0,apartment,Rio Grande do Sul,South,-30.03764,-51.189133,104.0,726908.65,Porto Alegre
980,981.0,apartment,Rio Grande do Sul,South,-30.080242,-51.225227,60.0,494701.71,Porto Alegre


### Modelo Preditor

In [4]:
# Carregando o modelo
model_uri = "models:/xgboost_model/7"  # modelo com melhor r2

# O modelo foi treinado e salvo no MLFlow
loaded_model = mlflow.pyfunc.load_model(model_uri)
loaded_model


mlflow.pyfunc.loaded_model:
  artifact_path: xgboost_model
  flavor: mlflow.sklearn
  run_id: 2369381fe2504d1496981f27d974de4c

## Predição Estática

In [5]:
# Fazer a previsão
predictions = loaded_model.predict(x_features)

# Adicionar as previsões ao DataFrame
df_sample["predictions"] = predictions

# converter previsões para o mesmo tipo da variável alvo
df_sample["predictions"] = df_sample["predictions"].astype(float).round(2)

# Exibir o DataFrame com as previsões
df_sample[["price_brl", "predictions"]]

Unnamed: 0,price_brl,predictions
3823,419213.6,348099.19
376,319400.84,314764.69
5473,1287584.69,963706.62
2282,726908.65,730391.0
980,494701.71,330280.34
10954,558951.47,604923.69
6607,993137.02,1018210.06
1212,678726.79,668755.38
6408,1362953.71,1182253.38
11009,1274609.05,1222384.38


### R² Score

In [6]:
# Calcular R²
r2 = r2_score(df_sample["price_brl"], df_sample["predictions"])
print(f"R²: {r2:.4f}")

R²: 0.7869


## Predição API

In [7]:
# gerando o JSON
# Gerar o JSON no formato esperado pelo MLflow Serve
dados = {"instances": x_features.to_dict(orient="records")}
dados

{'instances': [{'id': 3824.0,
   'property_type': 'apartment',
   'state': 'Rio de Janeiro',
   'region': 'Southeast',
   'lat': -22.930788,
   'lon': -43.355751,
   'area_m2': 62.0,
   'city': 'Rio de Janeiro'},
  {'id': 377.0,
   'property_type': 'apartment',
   'state': 'Piau\x92',
   'region': 'Northeast',
   'lat': -5.064457,
   'lon': -42.79275,
   'area_m2': 60.0,
   'city': 'Teresina'},
  {'id': 5474.0,
   'property_type': 'apartment',
   'state': 'Rio de Janeiro',
   'region': 'Southeast',
   'lat': -22.933163,
   'lon': -43.17466,
   'area_m2': 78.0,
   'city': 'Rio de Janeiro'},
  {'id': 2283.0,
   'property_type': 'apartment',
   'state': 'Rio Grande do Sul',
   'region': 'South',
   'lat': -30.03764,
   'lon': -51.189133,
   'area_m2': 104.0,
   'city': 'Porto Alegre'},
  {'id': 981.0,
   'property_type': 'apartment',
   'state': 'Rio Grande do Sul',
   'region': 'South',
   'lat': -30.080242,
   'lon': -51.225227,
   'area_m2': 60.0,
   'city': 'Porto Alegre'},
  {'id': 1

In [8]:
json_data = json.dumps(dados)
print(json_data)

{"instances": [{"id": 3824.0, "property_type": "apartment", "state": "Rio de Janeiro", "region": "Southeast", "lat": -22.930788, "lon": -43.355751, "area_m2": 62.0, "city": "Rio de Janeiro"}, {"id": 377.0, "property_type": "apartment", "state": "Piau\u0092", "region": "Northeast", "lat": -5.064457, "lon": -42.79275, "area_m2": 60.0, "city": "Teresina"}, {"id": 5474.0, "property_type": "apartment", "state": "Rio de Janeiro", "region": "Southeast", "lat": -22.933163, "lon": -43.17466, "area_m2": 78.0, "city": "Rio de Janeiro"}, {"id": 2283.0, "property_type": "apartment", "state": "Rio Grande do Sul", "region": "South", "lat": -30.03764, "lon": -51.189133, "area_m2": 104.0, "city": "Porto Alegre"}, {"id": 981.0, "property_type": "apartment", "state": "Rio Grande do Sul", "region": "South", "lat": -30.080242, "lon": -51.225227, "area_m2": 60.0, "city": "Porto Alegre"}, {"id": 10955.0, "property_type": "house", "state": "S\u008bo Paulo", "region": "Southeast", "lat": -22.723307, "lon": -47

### Request 

In [9]:
previsao = requests.post(
    "http://localhost:5000/invocations",
    headers={"Content-Type": "application/json"},
    data=json_data
)

print("HTTP STATUS", previsao.status_code)

HTTP STATUS 200


In [10]:
# Exibir o resultado da previsão
previsao.text

'{"predictions": [348099.1875, 314764.6875, 963706.625, 730391.0, 330280.34375, 604923.6875, 1018210.0625, 668755.375, 1182253.375, 1222384.375, 798778.8125, 1070625.5, 355035.8125, 863029.625, 542877.5625, 1041795.375, 491103.1875, 373186.9375, 512069.875, 270661.0625]}'

In [11]:
# Converter o resultado da previsão em JSON
previsao_json = json.loads(previsao.text)

# Extrair as previsões do JSON e adicioná-las ao DataFrame
df_sample["predict_api"] = [round(pred, 2) for pred in previsao_json["predictions"]]

# Exibir o DataFrame atualizado
df_sample[["price_brl", "predictions", "predict_api"]]

Unnamed: 0,price_brl,predictions,predict_api
3823,419213.6,348099.19,348099.19
376,319400.84,314764.69,314764.69
5473,1287584.69,963706.62,963706.62
2282,726908.65,730391.0,730391.0
980,494701.71,330280.34,330280.34
10954,558951.47,604923.69,604923.69
6607,993137.02,1018210.06,1018210.06
1212,678726.79,668755.38,668755.38
6408,1362953.71,1182253.38,1182253.38
11009,1274609.05,1222384.38,1222384.38


### R² Score API

In [12]:
r2_score_api = r2_score(df_sample["price_brl"], df_sample["predict_api"])
print(f"R² API: {r2_score_api:.4f}")

R² API: 0.7869
