In [1]:
import time
import random
import requests
import json
import pandas as pd

In [13]:
mdls_consult =   {'VW - VolksWagen' : ['gol', 'polo', 'voyage', 'fox', 'saveiro'],
                'GM - Chevrolet' : ['onix', 'prisma'],
                'Hyundai' : ['hb20', 'creta'],
                'Fiat' : ['palio', 'argo', 'mobi', 'logan'],
                'Ford' : ['ka', 'ecosport', 'focus'],
                'Renault' : ['sandero', 'logan', 'kwid'],
                'Toyota' : ['corolla', 'etios'],
                'Honda' : ['civic', 'city']}

In [None]:

_LAST_CALL = 0.0

BASE = "https://veiculos.fipe.org.br/api/veiculos"

S = requests.Session()
S.headers.update({
    "User-Agent": "Mozilla/5.0",
    "Accept": "application/json, text/plain, */*",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "Origin": "https://veiculos.fipe.org.br",
    "Referer": "https://veiculos.fipe.org.br/",
    "X-Requested-With": "XMLHttpRequest",
})


S.get("https://veiculos.fipe.org.br/")

def post(path: str, data: dict, *, min_delay=1.0, max_delay=20.0) -> dict:
    global _LAST_CALL

    # throttle BEFORE the request
    now = time.time()
    elapsed = now - _LAST_CALL
    target = random.uniform(min_delay, max_delay)
    if elapsed < target:
        time.sleep(target - elapsed)

    r = S.post(f"{BASE}/{path}", data=data, timeout=30)
    _LAST_CALL = time.time()  # mark after request is made

    r.raise_for_status()
    return r.json()

cod_ref = 330
tipo = 1
regra_ano_recente = 2018

In [124]:
# Commonly-used “flow” endpoints (names can change over time):
# - ConsultarTabelaDeReferencia
# - ConsultarMarcas
# - ConsultarModelos
# - ConsultarAnoModelo
# - ConsultarValor

# 1) Reference tables
refs = post("ConsultarTabelaDeReferencia", {})
with open("refs.json", "w", encoding="utf-8") as f:
    json.dump(refs, f, ensure_ascii=False, indent=2)

In [16]:
marcas = post("ConsultarMarcas", {
    "codigoTipoVeiculo": tipo,
    "codigoTabelaReferencia": cod_ref
})

marcas_dict = {m['Label']:m['Value'] for m in marcas}
with open("marcas.json", "w", encoding="utf-8") as f:
    json.dump(marcas_dict, f, ensure_ascii=False, indent=2)


In [8]:
with open("marcas.json", "r", encoding="utf-8") as f:
    marcas_dict = json.load(f)

In [10]:
# Loop through each brand and its target model keywords
for marca_, mdls in mdls_consult.items():
    print(f"[brand] {marca_}")

    # Get brand code from lookup dictionary
    cod_marca = marcas_dict.get(marca_)

    # Fetch all models for this brand
    modelos_resp = post("ConsultarModelos", {
            "codigoTipoVeiculo": tipo,
            "codigoTabelaReferencia": cod_ref,
            "codigoMarca": cod_marca
        })
    
    # Normalize API response and build model code lookup
    modelos = modelos_resp.get("Modelos", modelos_resp)
    modelos_dict = {m['Label']: m['Value'] for m in modelos}

    # Persist all models for the brand
    with open(f"{marca_}.json", "a", encoding="utf-8") as f:
        f.write(json.dumps(modelos_dict, ensure_ascii=False, indent=2) + "\n")

    for mdl_lbl in mdls:
        print(f"  [model_lbl] {mdl_lbl}")
        # Filter models that match the target keyword
        modelos_target = [m for m in modelos if mdl_lbl in m.get("Label", "").lower()]
        modelos_target_dict = {m['Label']: m['Value'] for m in modelos_target}
            
        # For each target model keyword
        for mdl in modelos_target_dict.keys():
            print(f"  [model] {mdl}")
            
            # Fetch available years for the selected model code
            anos = post("ConsultarAnoModelo", {
                "codigoTipoVeiculo": tipo,
                "codigoTabelaReferencia": cod_ref,
                "codigoMarca": cod_marca,
                "codigoModelo": modelos_target_dict.get(mdl)
            })

            # Build year code lookup
            anos_dict = {a['Label']: a['Value'] for a in anos}


            # Keep only years >= regra_ano_recente
            anos_recentes = {
                k: v
                for k, v in anos_dict.items()
                if int(k[:4]) >= regra_ano_recente and int(k[:4]) <= 2026
            }

            # Store model code and its years
            modelos_target_dict[mdl] = {
                "codigo_modelo": modelos_target_dict.get(mdl),
                "anos": anos_recentes
            }

            # Fetch detailed values for each recent year
            for ano_val, ano_code in anos_recentes.items():
                print(f"    [year] {ano_val}")
                det = post("ConsultarValorComTodosParametros", {
                    "codigoTipoVeiculo": tipo,
                    "codigoTabelaReferencia": cod_ref,
                    "codigoMarca": cod_marca,
                    "codigoModelo": modelos_target_dict.get(mdl)["codigo_modelo"],
                    "anoModelo": ano_code.split("-")[0],
                    "codigoTipoCombustivel": ano_code.split("-")[1],
                    "tipoConsulta": "tradicional"
                })

                # Attach details to the year entry
                modelos_target_dict[mdl]['anos'][ano_val] = {
                    "codigo_ano": modelos_target_dict[mdl]['anos'].get(ano_val),
                    "detalhes": det
                }

                print(f"[modelo] {mdl}, [ano] {ano_val.split('-')[0]}, [valor] {det.get('Valor')}")

                # Persist target model details
                with open(f"{marca_}_target.json", "a", encoding="utf-8") as f:
                    f.write(json.dumps(modelos_target_dict, ensure_ascii=False, indent=2) + "\n")

[brand] VW - VolksWagen
  [model_lbl] gol
  [model] Gol (novo) 1.0 Mi Total Flex 8V 2p
  [model] Gol (novo) 1.0 Mi Total Flex 8V 4p
  [model] Gol (novo) 1.6 Mi Total Flex 8V 2p
  [model] Gol (novo) 1.6 Mi Total Flex 8V 4p
  [model] Gol (novo) 1.6 Power/Highi T.Flex 8v 4P
  [model] Gol 1.0 Flex 12V 5p
    [year] 2023 Flex
[modelo] Gol 1.0 Flex 12V 5p, [ano] 2023 Flex, [valor] R$ 55.012,00
    [year] 2022 Flex
[modelo] Gol 1.0 Flex 12V 5p, [ano] 2022 Flex, [valor] R$ 50.561,00
    [year] 2021 Flex
[modelo] Gol 1.0 Flex 12V 5p, [ano] 2021 Flex, [valor] R$ 45.742,00
    [year] 2020 Flex
[modelo] Gol 1.0 Flex 12V 5p, [ano] 2020 Flex, [valor] R$ 43.946,00
    [year] 2019 Flex
[modelo] Gol 1.0 Flex 12V 5p, [ano] 2019 Flex, [valor] R$ 40.087,00
  [model] Gol 1.0 Mi FUN/ Highway/ Sport 16V  2/4p
  [model] Gol 1.0 Plus 16v 2p
  [model] Gol 1.0 Plus 16v 4p
  [model] Gol 1.0 Plus 8v 2p
  [model] Gol 1.0 Plus 8v 4p
  [model] Gol 1.0 Power 16v 76cv 4p
  [model] Gol 1.0 Total Flex 8V 5p (25 Anos)
  [

In [10]:


def flatten_modelos_keep_all(data):
    rows = []

    for modelo_nome, modelo_data in data.items():

        # Case 1 — model is just a code (no years yet)
        if not isinstance(modelo_data, dict):
            rows.append({
                "Modelo": modelo_nome,
                "CodigoModelo": modelo_data,
                "AnoLabel": None,
                "AnoModelo": None,
                "Combustivel": None,
                "Valor": None,
                "CodigoFipe": None,
                "MesReferencia": None,
                "DataConsulta": None,
            })
            continue

        codigo_modelo = modelo_data.get("codigo_modelo")
        anos = modelo_data.get("anos")

        # Case 2 — model has no years
        if not anos:
            rows.append({
                "Modelo": modelo_nome,
                "CodigoModelo": codigo_modelo,
                "AnoLabel": None,
                "AnoModelo": None,
                "Combustivel": None,
                "Valor": None,
                "CodigoFipe": None,
                "MesReferencia": None,
                "DataConsulta": None,
            })
            continue

        # Case 3 — model has years
        for ano_label, ano_data in anos.items():

            # If year is just a string like "2024-6"
            if not isinstance(ano_data, dict):
                rows.append({
                    "Modelo": modelo_nome,
                    "CodigoModelo": codigo_modelo,
                    "AnoLabel": ano_label,
                    "AnoModelo": int(ano_label[:4]) if ano_label[:4].isdigit() else None,
                    "Combustivel": None,
                    "Valor": None,
                    "CodigoFipe": None,
                    "MesReferencia": None,
                    "DataConsulta": None,
                })
                continue

            detalhes = ano_data.get("detalhes", {})

            rows.append({
                "Modelo": modelo_nome,
                "CodigoModelo": codigo_modelo,
                "AnoLabel": ano_label,
                "AnoModelo": detalhes.get("AnoModelo"),
                "Combustivel": detalhes.get("Combustivel"),
                "Valor": detalhes.get("Valor"),
                "CodigoFipe": detalhes.get("CodigoFipe"),
                "MesReferencia": detalhes.get("MesReferencia"),
                "DataConsulta": detalhes.get("DataConsulta"),
            })

            df = pd.DataFrame(rows)

            df["Valor_num"] = (
            df["Valor"]
            .str.replace("R$ ", "", regex=False)
            .str.replace(".", "", regex=False)
                .str.replace(",", ".", regex=False)
            )

            df["Valor_num"] = pd.to_numeric(df["Valor_num"], errors="coerce")

    return df


In [16]:
df_all_marcas = pd.DataFrame()

for marca_ in mdls_consult.keys():

    data = {}

    with open(f"{marca_}_target.json", "r", encoding="utf-8") as f:
        buffer = ""
        for line in f:
            buffer += line.strip()
            try:
                obj = json.loads(buffer)
                data.update(obj)
                buffer = ""
            except json.JSONDecodeError:
                continue

    df = flatten_modelos_keep_all(data)
    df["Marca"] = marca_
    df_all_marcas = pd.concat([df_all_marcas, df], ignore_index=True)
    print(f"[{marca_}] rows: {len(df)}, total: {len(df_all_marcas)}")


[VW - VolksWagen] rows: 432, total: 432
[GM - Chevrolet] rows: 151, total: 583
[Hyundai] rows: 269, total: 852
[Fiat] rows: 231, total: 1083
[Ford] rows: 153, total: 1236
[Renault] rows: 188, total: 1424
[Toyota] rows: 153, total: 1577
[Honda] rows: 121, total: 1698


In [18]:
df_all_marcas.to_csv(f"fipe_data_{cod_ref}.csv", index=False, encoding="utf-8-sig")