In [38]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os

In [39]:
shp = gpd.read_file(r"..\..\Data\Interim\PT-FireSprd_v2.1\L2_FireBehavior\PT-FireProg_v2.1_L2_p_meteo.shp")

print(f"Total de colunas: {len(shp.columns)}")
for i, coluna in enumerate(shp.columns, 1):
    print(f"{i:3d}. {coluna}")

Total de colunas: 120
  1. fid
  2. fname
  3. year
  4. id
  5. type
  6. sdate
  7. edate
  8. inidoy
  9. enddoy
 10. source
 11. zp_link
 12. burn_perio
 13. area
 14. growth_rat
 15. ros_i
 16. ros_p
 17. spdir_i
 18. spdir_p
 19. int_i
 20. int_p
 21. duration_i
 22. duration_p
 23. qc
 24. 1_3y_fir_p
 25. 3_8y_fir_p
 26. 8_ny_fir_p
 27. elev_av
 28. aspect_av
 29. landform
 30. fuel_model
 31. f_load_av
 32. land_use
 33. land_use_d
 34. gp_m2s2_av
 35. CBH_m_av
 36. HigCC_p_av
 37. LowCC_p_av
 38. MidCC_p_av
 39. TotCC_p_av
 40. BLH_m_av
 41. Cape_av
 42. Cin_av
 43. sW_7_av
 44. sW_28_av
 45. sW_100_av
 46. sW_289_av
 47. DC_12h_av
 48. FFMC_12h_a
 49. FWI_12h_av
 50. t_2m_C_av
 51. d_2m_C_av
 52. sP_hPa_av
 53. wv10_kh_av
 54. wdir10_av
 55. wv_Fb_av
 56. wdir_Fb_av
 57. wv100_k_av
 58. wdir100_av
 59. rh_2m_av
 60. VPD_Pa_av
 61. dfmc_av
 62. sW_1m_av
 63. sW_3m_av
 64. LCL_hPa_av
 65. LCL_m_av
 66. HDW_av
 67. Haines_av
 68. wSv_9_av
 69. wSdir_9_av
 70. wSv_7_av
 71. wSdir

In [40]:
columns_to_lag = ["ros_p"]
n_lags = 1
shp_combined = shp.sort_values(["fname", "zp_link", "sdate"]).reset_index(drop=True)

# garantir datetime
shp_combined["sdate"] = pd.to_datetime(shp_combined["sdate"], errors='coerce')
shp_combined["edate"] = pd.to_datetime(shp_combined["edate"], errors='coerce')

for col in columns_to_lag:
    if col not in shp_combined.columns:
        print(f"⚠️ Coluna '{col}' não encontrada. Pulando.")
        continue

    for lag in range(1, n_lags + 1):
        lag_col_name = f"{col}_lg{lag}"
        shp_combined[lag_col_name] = np.nan

        # processa por grupo (fname, zplink)
        for (fname, zplink), group in shp_combined.groupby(["fname", "zp_link"]):
            group = group.sort_values("sdate")
            group_idx = group.index

            for idx in group_idx:
                current_time = shp_combined.loc[idx, "sdate"]  # início do evento atual
                target_time = current_time - pd.Timedelta(hours=lag)

                # procurar a feature anterior que estava ativa no instante target_time
                mask = (group["sdate"] <= target_time) & (group["edate"] > target_time)

                if mask.any():
                    active_row = group.loc[mask].iloc[-1]
                    shp_combined.at[idx, lag_col_name] = active_row[col]
                else:
                    shp_combined.at[idx, lag_col_name] = np.nan

print("✅ Lags temporais adicionados com base na feature ativa há X horas.")

shp_lags = shp_combined

  shp_combined["sdate"] = pd.to_datetime(shp_combined["sdate"], errors='coerce')


✅ Lags temporais adicionados com base na feature ativa há X horas.


In [41]:
def calculate_fire_durations(df):
    """
    Calculates 'duration' (time since fire start) and lag features 
    (time differences between consecutive observations).
    """
    df_temp = df.copy()
    
    # 1. Convert sdate to datetime
    df_temp['sdate'] = pd.to_datetime(df_temp['sdate'], errors='coerce')
    
    # 2. Sort by fire name and date
    df_temp = df_temp.sort_values(by=['fname', 'sdate'])
    
    # 3. Calculate 'duration' (time since the start of the fire)
    fire_start_times = df_temp.groupby('fname')['sdate'].transform('min')
    df_temp['f_start'] = (df_temp['sdate'] - fire_start_times).dt.total_seconds() / 60
    
    
    return df_temp

# Apply the corrected function
shp_processed = calculate_fire_durations(shp_lags)

# --- Verification ---
print("\nVerifying calculation for 'Agueda_08082016':")
print(shp_processed[shp_processed['f_start'] == 'Agueda_08082016'][
    ['fname', 'sdate', 'f_start']
].head(10))

print("\nVerifying calculation for 'Gouveia_10082015':")
print(shp_processed[shp_processed['f_start'] == 'Gouveia_10082015'][
    ['fname', 'sdate', 'f_start']
].head(10))




Verifying calculation for 'Agueda_08082016':
Empty DataFrame
Columns: [fname, sdate, f_start]
Index: []

Verifying calculation for 'Gouveia_10082015':
Empty DataFrame
Columns: [fname, sdate, f_start]
Index: []


In [42]:
"""
Create fire front ranking system:
    -4: Other missing values
    -3: Partially overlapping intervals
    -2: No meteorological data because of short time interval
    -1: Missing sdate or edate
    1: Single fire front for specific fire at specific time
    2: Multiple fire fronts - highest ROS_p
    3: Multiple fire fronts - lowest ROS_p
"""
shp_processed['fire_rank'] = 1

# Converter colunas de data para datetime se forem strings
if shp_processed['sdate'].dtype == 'object':
    shp_processed['sdate'] = pd.to_datetime(shp_processed['sdate'])
if shp_processed['edate'].dtype == 'object':
    shp_processed['edate'] = pd.to_datetime(shp_processed['edate'])

# -1: Missing sdate or edate
missing_mask = shp_processed['sdate'].isna() | shp_processed['edate'].isna()
shp_processed.loc[missing_mask, 'fire_rank'] = -1

# -2: No meteorological data because of short time interval
def interval_has_full_hour(row):
    if pd.isna(row['sdate']) or pd.isna(row['edate']):
        return False
    # Generate range of hours
    s = row['sdate'].ceil('H')  # next full hour
    e = row['edate'].floor('H') # last full hour
    return s <= e

mask_no_full_hour = (~missing_mask) & (~shp_processed.apply(interval_has_full_hour, axis=1))
shp_processed.loc[mask_no_full_hour, 'fire_rank'] = -2

# -4: Other missing values
mask_missing_meteo = shp_processed['t_2m_C_av'].isna() & (shp_processed['ros_p'] != -1) & (shp_processed['fire_rank'] > 0)
shp_processed.loc[mask_missing_meteo, 'fire_rank'] = -4

# -3: Partially overlapping intervals
valid_mask = (shp_processed['fire_rank'] == 1)

# Ordenar por fname e sdate para verificar sobreposições
shp_processed_sorted = shp_processed[valid_mask].sort_values(['fname', 'sdate'])

for fname, group in shp_processed_sorted.groupby('fname'):
    if len(group) > 1:
        # Verificar sobreposição entre intervalos consecutivos
        for i in range(len(group) - 1):
            current = group.iloc[i]
            next_row = group.iloc[i + 1]
            
            # Verificar se há sobreposição parcial
            # Sobreposição ocorre se: current_edate > next_sdate
            # E NÃO são exatamente o mesmo intervalo (para permitir ranks 2 e 3)
            if (current['edate'] > next_row['sdate']) and not (
                current['sdate'] == next_row['sdate'] and current['edate'] == next_row['edate']
            ):
                # Marcar ambas as progressões sobrepostas como -3
                shp_processed.loc[current.name, 'fire_rank'] = -3
                shp_processed.loc[next_row.name, 'fire_rank'] = -3

# 2 e 3: Multiple fire fronts - highest and lowest ROS_p
valid_mask = (shp_processed['fire_rank'] == 1)
for (fname, sdate, edate), group in shp_processed[valid_mask].groupby(['fname', 'sdate', 'edate']):
    valid_rows = group[(group['type'] == 'p') & (group['ros_p'] > 0)]
    if len(valid_rows) > 1:
        max_ros_idx = valid_rows['ros_p'].idxmax()
        min_ros_idx = valid_rows['ros_p'].idxmin()
        shp_processed.loc[max_ros_idx, 'fire_rank'] = 2
        shp_processed.loc[min_ros_idx, 'fire_rank'] = 3

# Display results
rank_counts = shp_processed['fire_rank'].value_counts().sort_index()
print("Fire front rank distribution:")
print(rank_counts)
print(f"\nTotal rows: {len(shp_processed)}")

  s = row['sdate'].ceil('H')  # next full hour
  e = row['edate'].floor('H') # last full hour


Fire front rank distribution:
fire_rank
-4       3
-3     254
-2      14
-1    1640
 1     910
 2     267
 3     267
Name: count, dtype: int64

Total rows: 3355


In [43]:
for i, coluna in enumerate(shp_processed.columns, 1):
    print(f"{i:3d}. {coluna}")

  1. fid
  2. fname
  3. year
  4. id
  5. type
  6. sdate
  7. edate
  8. inidoy
  9. enddoy
 10. source
 11. zp_link
 12. burn_perio
 13. area
 14. growth_rat
 15. ros_i
 16. ros_p
 17. spdir_i
 18. spdir_p
 19. int_i
 20. int_p
 21. duration_i
 22. duration_p
 23. qc
 24. 1_3y_fir_p
 25. 3_8y_fir_p
 26. 8_ny_fir_p
 27. elev_av
 28. aspect_av
 29. landform
 30. fuel_model
 31. f_load_av
 32. land_use
 33. land_use_d
 34. gp_m2s2_av
 35. CBH_m_av
 36. HigCC_p_av
 37. LowCC_p_av
 38. MidCC_p_av
 39. TotCC_p_av
 40. BLH_m_av
 41. Cape_av
 42. Cin_av
 43. sW_7_av
 44. sW_28_av
 45. sW_100_av
 46. sW_289_av
 47. DC_12h_av
 48. FFMC_12h_a
 49. FWI_12h_av
 50. t_2m_C_av
 51. d_2m_C_av
 52. sP_hPa_av
 53. wv10_kh_av
 54. wdir10_av
 55. wv_Fb_av
 56. wdir_Fb_av
 57. wv100_k_av
 58. wdir100_av
 59. rh_2m_av
 60. VPD_Pa_av
 61. dfmc_av
 62. sW_1m_av
 63. sW_3m_av
 64. LCL_hPa_av
 65. LCL_m_av
 66. HDW_av
 67. Haines_av
 68. wSv_9_av
 69. wSdir_9_av
 70. wSv_7_av
 71. wSdir_7_av
 72. wSv_5_av
 7

In [44]:
shp_processed['HigCC_p_av'] = shp_processed['HigCC_p_av'] * 100
shp_processed['MidCC_p_av'] = shp_processed['MidCC_p_av'] * 100
shp_processed['LowCC_p_av'] = shp_processed['LowCC_p_av'] * 100
shp_processed['TotCC_p_av'] = shp_processed['TotCC_p_av'] * 100

shp_processed = shp_processed.drop(columns=['wv_Fb_av', 'wdir_Fb_av'])

In [45]:
# mantém a ordem original até 'qc'
originais_ate_qc = [
    'fid','fname','year','id','type','sdate','edate','inidoy','enddoy',
    'source','zp_link','burn_perio','area','growth_rat','ros_i','ros_p',
    'spdir_i','spdir_p','int_i','int_p','duration_i','duration_p','qc'
]


# nova ordem racional após qc
novas_ordenadas = [

    # 2. Topography
    'elev_av','aspect_av','landform',

    # 3. Land Use
    'land_use','land_use_d',

    # 1. Fuel
    '1_3y_fir_p','3_8y_fir_p','8_ny_fir_p','fuel_model','f_load_av',

    # 5. Soil Moisture
    'sW_1m_av','sW_3m_av','sW_7_av','sW_28_av','sW_100_av','sW_289_av',

    # 4. Surface Meteorology
    't_2m_C_av','d_2m_C_av','rh_2m_av','VPD_Pa_av','sP_hPa_av', 'gp_m2s2_av',
    'dfmc_av','HDW_av','Haines_av',

    # 19. Fire Weather Indexes
    'FWI_12h_av', 'DC_12h_av', 'FFMC_12h_a',

    # 10. Wind - Surface & 100m
    'wv10_kh_av','wdir10_av','wv100_k_av','wdir100_av',

    'Recirc','CircVar',

    # 11. Horizontal Wind Speeds (levels)
    'wv_950_av','wv_850_av','wv_700_av','wv_500_av','wv_300_av',

    # 12. Horizontal Wind Directions (levels)
    'wdi_950_av','wdi_850_av','wdi_700_av','wdi_500_av','wdi_300_av',

    # 13. Vertical Wind Velocity
    'vwv_950_av','vwv_850_av','vwv_700_av','vwv_500_av','vwv_300_av',

    # 14. Relative Humidity Levels
    'rh_950_av','rh_850_av','rh_700_av','rh_500_av','rh_300_av',

    # 15. Temperature Levels
    't_950_av','t_850_av','t_700_av','t_500_av','t_300_av',

    # 16. Geopotential
    'gp_950_av','gp_850_av','gp_700_av','gp_500_av','gp_300_av',

    # 8. Temperature Gradients
    'gT_s_9_av','gT_9_8_av','gT_8_7_av','gT_7_5_av','gT_5_3_av',

    # 9. Wind Shear
    'wSv_9_av','wSdir_9_av','wSv_7_av','wSdir_7_av',
    'wSv_5_av','wSdir_5_av','wSv_1_av','wSdir_1_av',

    # 6. Clouds & BLH
    'CBH_m_av','HigCC_p_av','LowCC_p_av','MidCC_p_av','TotCC_p_av',

    # 7. Convection & Lifting
    'Cape_av','Cin_av', 'BLH_m_av', 'BLH_m_rt', 
    'LCL_hPa_av','LCL_m_av',
    'LFC_hPa_av','CCL_hPa_av','EL_m_av','LiftIdx_av',
    'VentIdx_av','CMLG_av',

    # 17. Additional Fire Behaviour
    'ros_p_lg1','f_start','fire_rank',

    # 18. Geometry
    'geometry'
]


# ----------------------------
# construir lista final
# ----------------------------

todas_cols = originais_ate_qc + novas_ordenadas

# verificar se existem colunas faltantes ou extras
faltantes = [c for c in todas_cols if c not in shp_processed.columns]
extras = [c for c in shp_processed.columns if c not in todas_cols]

print("Faltantes no DataFrame:", faltantes)
print("Colunas extras (não ordenadas):", extras)


# ----------------------------
# reordenar o DataFrame
# ----------------------------
shp_processed = shp_processed[todas_cols]

print("Reordenação concluída!")


Faltantes no DataFrame: []
Colunas extras (não ordenadas): []
Reordenação concluída!


In [46]:
shp_processed['sdate'] = shp_processed['sdate'].astype(str)
shp_processed['edate'] = shp_processed['edate'].astype(str)

In [47]:
os.makedirs(r'..\..\Data\Processed\PT-FireSprd_v2.1\L2_FireBehavior', exist_ok=True)
shp_processed.to_file(r'..\..\Data\Processed\PT-FireSprd_v2.1\L2_FireBehavior\PT-FireProg_v2.1_L2_final.shp')