In [None]:
!pip install py7zr wget pandas > /dev/null


In [None]:
import os
import wget
import zipfile
import requests
import pandas as pd

In [None]:
SOURCE_URL = "https://portal.inmet.gov.br/uploads/dadoshistoricos/2025.zip"
INPUT_DIR = "./input"
EXTRACT_DIR = "./input/inmet_2025"

In [None]:
def download_zip_inmet(url: str, output_dir: str) -> str:
    os.makedirs(output_dir, exist_ok=True)

    filename = url.split("/")[-1]
    output_path = os.path.join(output_dir, filename)

    if os.path.exists(output_path):
        print("‚ö†Ô∏è Arquivo j√° existe, pulando download")
        return output_path

    print(f"üì• Downloading {filename}...")

    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0 Safari/537.36"
    }

    with requests.get(url, headers=headers, stream=True, timeout=60) as r:
        r.raise_for_status()
        with open(output_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)

    print("‚úÖ Download conclu√≠do")
    return output_path


In [None]:
def extract_zip(zip_path: str, extract_dir: str):
    os.makedirs(extract_dir, exist_ok=True)

    print("üì¶ Extraindo arquivos...")
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(extract_dir)

    print("‚úÖ Extra√ß√£o conclu√≠da")

In [None]:
zip_path = download_zip_inmet(SOURCE_URL, INPUT_DIR)
extract_zip(zip_path, EXTRACT_DIR)

üì• Downloading 2025.zip...
‚úÖ Download conclu√≠do
üì¶ Extraindo arquivos...
‚úÖ Extra√ß√£o conclu√≠da


In [None]:
def listar_csvs(base_dir: str):
    arquivos = []
    for root, _, files in os.walk(base_dir):
        for f in files:
            if f.lower().endswith(".csv"):
                arquivos.append(os.path.join(root, f))
    return arquivos

In [None]:
def concat_csvs_to_dfs(csvs_list: list):

  dfs = []
  for arq in csvs_list:
      df = pd.read_csv(
          arq,
          sep=';',
          encoding='latin1',
          skiprows=8,          # INMET: ignora metadados iniciais
          decimal=',',
          na_values=['', ' ']
      )

      # adiciona origem
      df['arquivo_origem'] = arq.split('/')[-1]
      dfs.append(df)

  return pd.concat(dfs, ignore_index=True)


In [None]:
csvs_list = listar_csvs(EXTRACT_DIR)
df_imnet = concat_csvs_to_dfs(csvs_list)

In [None]:
df_imnet.head()

Unnamed: 0,Data,Hora UTC,"PRECIPITA√á√ÉO TOTAL, HOR√ÅRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESS√ÉO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESS√ÉO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (Kj/m¬≤),"TEMPERATURA DO AR - BULBO SECO, HORARIA (¬∞C)",TEMPERATURA DO PONTO DE ORVALHO (¬∞C),TEMPERATURA M√ÅXIMA NA HORA ANT. (AUT) (¬∞C),...,TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (¬∞C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (¬∞C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIRE√á√ÉO HORARIA (gr) (¬∞ (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)",Unnamed: 19,arquivo_origem
0,2025/01/01,0000 UTC,1.0,1004.7,1004.7,1003.7,0.1,24.2,22.4,25.2,...,23.1,22.1,90.0,85.0,90.0,121.0,5.3,2.1,,INMET_NE_MA_A255_ZE DOCA_01-01-2025_A_30-11-20...
1,2025/01/01,0100 UTC,0.0,1005.1,1005.1,1004.7,0.0,24.8,22.8,24.8,...,23.0,22.5,91.0,89.0,89.0,47.0,3.9,1.9,,INMET_NE_MA_A255_ZE DOCA_01-01-2025_A_30-11-20...
2,2025/01/01,0200 UTC,0.0,1005.5,1005.5,1004.8,0.0,24.8,23.0,25.0,...,23.2,22.7,90.0,89.0,90.0,119.0,3.3,0.9,,INMET_NE_MA_A255_ZE DOCA_01-01-2025_A_30-11-20...
3,2025/01/01,0300 UTC,8.2,1006.1,1006.3,1005.5,1.1,23.2,22.3,24.9,...,23.6,21.9,95.0,90.0,95.0,185.0,12.1,1.2,,INMET_NE_MA_A255_ZE DOCA_01-01-2025_A_30-11-20...
4,2025/01/01,0400 UTC,0.6,1004.8,1006.2,1004.8,0.0,23.2,22.3,23.2,...,22.3,21.9,95.0,94.0,95.0,318.0,3.7,1.3,,INMET_NE_MA_A255_ZE DOCA_01-01-2025_A_30-11-20...
