1) Imports e paths

In [8]:
import sys
from pathlib import Path

ROOT = Path().resolve().parent
sys.path.append(str(ROOT))

import pandas as pd
from src.io_epw import load_epw_folder 

2. Site parameters and scenarios

In [9]:
SITE_ID = "MAO"  # nome da pasta dentro de data/raw
RAW_DIR = ROOT / "data" / "raw" / SITE_ID
INTERIM_DIR = ROOT / "data" / "interim" / SITE_ID
INTERIM_DIR.mkdir(parents=True, exist_ok=True)

SCENARIO = {
    "category": "historical",   # "historical" ou "future"
    "horizon": None,            # "midterm"/"longterm" ou None
    "rcp": None,                # "RCP26", "RCP45", "RCP85", "SSP245"... ou None
    "period": (1991, 2023),     # período de referência desse lote de EPWs
}

3. Load EPWs

In [10]:
df = load_epw_folder(RAW_DIR, site_id=SITE_ID, scenario=SCENARIO)

print(df.shape)
df.head(3)

(289272, 50)


Unnamed: 0,site_id,timeset,year,month,day,hour,ta_c,tdp_c,rh_pct,p_atm_pa,...,latitude,longitude,elevation_m,scenario_category,scenario_horizon,scenario_rcp,period_start,period_end,epw_tz_offset_h,epw_tz_label
0,MAO,1991-01-01 01:00:00,1991,1,1,1,23.0,23.0,100,100389,...,-3.039,-60.05,80.5,historical,,,1991,2023,-4.0,UTC-4
1,MAO,1991-01-01 02:00:00,1991,1,1,2,23.0,23.0,100,100389,...,-3.039,-60.05,80.5,historical,,,1991,2023,-4.0,UTC-4
2,MAO,1991-01-01 03:00:00,1991,1,1,3,23.0,23.0,100,100389,...,-3.039,-60.05,80.5,historical,,,1991,2023,-4.0,UTC-4


4. Save raw CSV (UTC naive)

In [11]:
df.to_csv(INTERIM_DIR/"met_raw.csv.gz", index=False)

5. Daily agg (Max/mean Ta)

In [12]:
daily = (df.set_index("timeset")
           .groupby(pd.Grouper(freq="D"))
           .agg(tmax_c=("ta_c", "max"),
                tmean_c=("ta_c", "mean"))
           .reset_index())
daily["site_id"] = SITE_ID

daily_csv = INTERIM_DIR / "met_daily.csv.gz"
daily.to_csv(
    daily_csv,
    index=False,
    compression="gzip",
    date_format="%Y-%m-%d",  # diário
    float_format="%.6g",
)

daily.head(3), daily_csv

(     timeset  tmax_c    tmean_c site_id
 0 1991-01-01    30.0  25.391304     MAO
 1 1991-01-02    34.0  26.916667     MAO
 2 1991-01-03    35.0  28.250000     MAO,
 WindowsPath('G:/Meu Drive/UFSC/UFSC - Doutorado/3 HW Detection methods/1 Desenvolvimento - Pipeline/data/interim/MAO/met_daily.csv.gz'))

6. Quality control

In [13]:
print("Cols:", df.columns.tolist()[:12], "...")
print("Período:", df["timeset"].min(), "→", df["timeset"].max())
print("Daily rows:", len(daily))
print(df[["ta_c","rh_pct","wind_spd_ms"]].describe().round(2))

Cols: ['site_id', 'timeset', 'year', 'month', 'day', 'hour', 'ta_c', 'tdp_c', 'rh_pct', 'p_atm_pa', 'ghi_Whm2', 'dni_Whm2'] ...
Período: 1991-01-01 01:00:00 → 2024-01-01 00:00:00
Daily rows: 12054
            ta_c     rh_pct  wind_spd_ms
count  289272.00  289272.00    289272.00
mean       26.75      84.35         1.48
std         3.22      14.27         1.60
min        18.00      23.00         0.00
25%        24.00      74.00         0.00
50%        26.00      89.00         1.00
75%        29.00      94.00         2.60
max        39.00     100.00        26.80
