# 02. Enriquecimento de Dados: chuva

### 1. Bibliotecas e DataFrame

In [9]:
from config import (
    pd, np, os, salvar_parquet,
    resumo_df, checar_nulos, coord,
    anos, caminho_chuva, url
)

import openmeteo_requests
import requests_cache
import requests
from retry_requests import retry
import time

df = pd.read_parquet("../dados/intermediarios/acidentes_tratado.parquet")

### 2. Configura API Open-Meteo com cache e retry em caso de erro 

In [10]:
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

### 3. Cria função para chamar API

Função para coletar dados horários de precipitação de um ponto (lat/lon)
para um determinado ano, salvando em formato parquet.

Args:  
* lat (float): Latitude
* lon (float): Longitude
* ano (int): Ano desejado
* nome (str): Nome da região (ex.: NORTE)


In [11]:
def dados_chuva(lat: float, lon: float, nome: str):
   
    os.makedirs(caminho_chuva, exist_ok=True)

    for ano in anos:
        params = {
            "latitude": lat,
            "longitude": lon,
            "start_date": f"{ano}-01-01",
            "end_date": f"{ano}-12-31",
            "hourly": 'precipitation',
            "timezone": 'America/Sao_Paulo'
        }

    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    hourly = response.Hourly()

    chuva_hora = hourly.Variables(0).ValuesAsNumpy()

    df_chuva = pd.DataFrame({
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "precipitation": chuva_hora
    })

    nome_arquivo = f"{caminho_chuva}chuva_{ano}_{nome.lower()}.parquet"
    df_chuva.to_parquet(nome_arquivo, index=False)
    print(f"✅ Arquivo salvo: {nome_arquivo}")

    return df_chuva

### 4. Chama API por região

#### 1. Norte

In [12]:
lat, lon = coord["NORTE"]
dados_chuva(lat, lon, "NORTE")


✅ Arquivo salvo: ../dados/intermediarios/clima/chuva_2024_norte.parquet


Unnamed: 0,date,precipitation
0,2024-01-01 03:00:00+00:00,0.00
1,2024-01-01 04:00:00+00:00,0.00
2,2024-01-01 05:00:00+00:00,0.00
3,2024-01-01 06:00:00+00:00,0.00
4,2024-01-01 07:00:00+00:00,0.00
...,...,...
8779,2024-12-31 22:00:00+00:00,0.00
8780,2024-12-31 23:00:00+00:00,0.00
8781,2025-01-01 00:00:00+00:00,0.00
8782,2025-01-01 01:00:00+00:00,0.00


#### 2. Leste

#### 3. Centro

#### 4. Sul

In [None]:

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"

params = {
	"latitude": lat,
	"longitude": long,
	"start_date": f"{ano}-01-01",
    "end_date": f"{ano}-12-31",
	"hourly": "precipitation",
	"timezone": "America/Sao_Paulo",
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_precipitation = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["precipitation"] = hourly_precipitation

hourly_dataframe = pd.DataFrame(data = hourly_data)
print("\nHourly data\n", hourly_dataframe)
hourly_dataframe.head(50)
hourly_dataframe.tail(50)

Coordinates: -30.125°N -51.25°E
Elevation: 46.0 m asl
Timezone: b'America/Sao_Paulo'b'GMT-3'
Timezone difference to GMT+0: -10800s

Hourly data
                           date  precipitation
0    2022-01-01 03:00:00+00:00           0.00
1    2022-01-01 04:00:00+00:00           0.00
2    2022-01-01 05:00:00+00:00           0.00
3    2022-01-01 06:00:00+00:00           0.00
4    2022-01-01 07:00:00+00:00           0.00
...                        ...            ...
8755 2022-12-31 22:00:00+00:00           0.00
8756 2022-12-31 23:00:00+00:00           0.00
8757 2023-01-01 00:00:00+00:00           0.00
8758 2023-01-01 01:00:00+00:00           0.00
8759 2023-01-01 02:00:00+00:00           0.00

[8760 rows x 2 columns]


Unnamed: 0,date,precipitation
8710,2022-12-30 01:00:00+00:00,0.0
8711,2022-12-30 02:00:00+00:00,0.0
8712,2022-12-30 03:00:00+00:00,0.0
8713,2022-12-30 04:00:00+00:00,0.0
8714,2022-12-30 05:00:00+00:00,0.0
8715,2022-12-30 06:00:00+00:00,0.0
8716,2022-12-30 07:00:00+00:00,0.0
8717,2022-12-30 08:00:00+00:00,0.0
8718,2022-12-30 09:00:00+00:00,0.0
8719,2022-12-30 10:00:00+00:00,0.0


In [28]:
dia = pd.to_datetime('2022-04-03').date()
busca = hourly_dataframe[hourly_dataframe['date'].dt.date == dia]

busca.info()
print(busca.head(25))

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 2205 to 2228
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   date           24 non-null     datetime64[ns, UTC]
 1   precipitation  24 non-null     float32            
dtypes: datetime64[ns, UTC](1), float32(1)
memory usage: 480.0 bytes
                          date  precipitation
2205 2022-04-03 00:00:00+00:00            0.0
2206 2022-04-03 01:00:00+00:00            0.0
2207 2022-04-03 02:00:00+00:00            0.0
2208 2022-04-03 03:00:00+00:00            0.0
2209 2022-04-03 04:00:00+00:00            0.0
2210 2022-04-03 05:00:00+00:00            0.0
2211 2022-04-03 06:00:00+00:00            0.0
2212 2022-04-03 07:00:00+00:00            0.0
2213 2022-04-03 08:00:00+00:00            0.0
2214 2022-04-03 09:00:00+00:00            0.0
2215 2022-04-03 10:00:00+00:00            0.0
2216 2022-04-03 11:00:00+00:00            0.0
2217 20