In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os

In [2]:
shp = gpd.read_file(r"../../Data/Interim/PT-FireSprd_v2.1/L2_FireBehavior/PT-FireProg_v2.1_L2_p_meteo.shp")

print(f"Total de colunas: {len(shp.columns)}")
for i, coluna in enumerate(shp.columns, 1):
    print(f"{i:3d}. {coluna}")

Total de colunas: 117
  1. fid
  2. fname
  3. year
  4. id
  5. type
  6. sdate
  7. edate
  8. inidoy
  9. enddoy
 10. source
 11. zp_link
 12. burn_perio
 13. area
 14. growth_rat
 15. ros_i
 16. ros_p
 17. spdir_i
 18. spdir_p
 19. int_i
 20. int_p
 21. duration_i
 22. duration_p
 23. qc
 24. 1_3y_fir_p
 25. 3_8y_fir_p
 26. 8_ny_fir_p
 27. elev_av
 28. aspect_av
 29. landform
 30. fuel_model
 31. f_load_av
 32. land_use
 33. land_use_d
 34. CBH_m_av
 35. HigCC_p_av
 36. LowCC_p_av
 37. MidCC_p_av
 38. TotCC_p_av
 39. BLH_m_av
 40. Cape_av
 41. Cin_av
 42. sW_7_av
 43. sW_28_av
 44. sW_100_av
 45. sW_289_av
 46. gp_m2s2_av
 47. t_2m_C_av
 48. d_2m_C_av
 49. sP_hPa_av
 50. wv10_kh_av
 51. wdir10_av
 52. wv100_k_av
 53. wdir100_av
 54. rh_2m_av
 55. VPD_Pa_av
 56. dfmc_av
 57. sW_1m_av
 58. sW_3m_av
 59. LCL_hPa_av
 60. LCL_m_av
 61. HDW_av
 62. Haines_av
 63. wSv_9_av
 64. wSdir_9_av
 65. wSv_7_av
 66. wSdir_7_av
 67. wSv_5_av
 68. wSdir_5_av
 69. wSv_1_av
 70. wSdir_1_av
 71. gT_s_9

In [3]:
columns_to_lag = ["ros_p"]
n_lags = 1
shp_combined = shp.sort_values(["fname", "zp_link", "sdate"]).reset_index(drop=True)

# garantir datetime
shp_combined["sdate"] = pd.to_datetime(shp_combined["sdate"], errors='coerce')
shp_combined["edate"] = pd.to_datetime(shp_combined["edate"], errors='coerce')

for col in columns_to_lag:
    if col not in shp_combined.columns:
        print(f"⚠️ Coluna '{col}' não encontrada. Pulando.")
        continue

    for lag in range(1, n_lags + 1):
        lag_col_name = f"{col}_lg{lag}"
        shp_combined[lag_col_name] = np.nan

        # processa por grupo (fname, zplink)
        for (fname, zplink), group in shp_combined.groupby(["fname", "zp_link"]):
            group = group.sort_values("sdate")
            group_idx = group.index

            for idx in group_idx:
                current_time = shp_combined.loc[idx, "sdate"]  # início do evento atual
                target_time = current_time - pd.Timedelta(hours=lag)

                # procurar a feature anterior que estava ativa no instante target_time
                mask = (group["sdate"] <= target_time) & (group["edate"] > target_time)

                if mask.any():
                    active_row = group.loc[mask].iloc[-1]
                    shp_combined.at[idx, lag_col_name] = active_row[col]
                else:
                    shp_combined.at[idx, lag_col_name] = np.nan

print("✅ Lags temporais adicionados com base na feature ativa há X horas.")

shp_lags = shp_combined

  shp_combined["sdate"] = pd.to_datetime(shp_combined["sdate"], errors='coerce')


✅ Lags temporais adicionados com base na feature ativa há X horas.


In [4]:
def calculate_fire_durations(df):
    """
    Calculates 'duration' (time since fire start) and lag features 
    (time differences between consecutive observations).
    """
    df_temp = df.copy()
    
    # 1. Convert sdate to datetime
    df_temp['sdate'] = pd.to_datetime(df_temp['sdate'], errors='coerce')
    
    # 2. Sort by fire name and date
    df_temp = df_temp.sort_values(by=['fname', 'sdate'])
    
    # 3. Calculate 'duration' (time since the start of the fire)
    fire_start_times = df_temp.groupby('fname')['sdate'].transform('min')
    df_temp['f_start'] = (df_temp['sdate'] - fire_start_times).dt.total_seconds() / 60
    
    
    return df_temp

# Apply the corrected function
shp_processed = calculate_fire_durations(shp_lags)

# --- Verification ---
print("/nVerifying calculation for 'Agueda_08082016':")
print(shp_processed[shp_processed['f_start'] == 'Agueda_08082016'][
    ['fname', 'sdate', 'f_start']
].head(10))

print("/nVerifying calculation for 'Gouveia_10082015':")
print(shp_processed[shp_processed['f_start'] == 'Gouveia_10082015'][
    ['fname', 'sdate', 'f_start']
].head(10))



/nVerifying calculation for 'Agueda_08082016':
Empty DataFrame
Columns: [fname, sdate, f_start]
Index: []
/nVerifying calculation for 'Gouveia_10082015':
Empty DataFrame
Columns: [fname, sdate, f_start]
Index: []


In [5]:
"""
Create fire front ranking system:
    -3: Other missing values
    -2: No meteorological data because of short time interval
    -1: Missing sdate or edate
    1: Single fire front for specific fire at specific time
    2: Multiple fire fronts - highest ROS_p
    3: Multiple fire fronts - lowest ROS_p
"""
shp_processed['fire_rank'] = 1

# Handle missing dates
missing_mask = shp_processed['sdate'].isna() | shp_processed['edate'].isna()
shp_processed.loc[missing_mask, 'fire_rank'] = -1


# Handle intervals that don't contain exact hour (-2)
def interval_has_full_hour(row):
    if pd.isna(row['sdate']) or pd.isna(row['edate']):
        return False
    # Generate range of hours
    s = row['sdate'].ceil('H')  # next full hour
    e = row['edate'].floor('H') # last full hour
    return s <= e

mask_no_full_hour = (~missing_mask) & (~shp_processed.apply(interval_has_full_hour, axis=1))
shp_processed.loc[mask_no_full_hour, 'fire_rank'] = -2

# Handle missing meteorological data (-3)
mask_missing_meteo = shp_processed['t_2m_C_av'].isna() & (shp_processed['ros_p'] != -1) & (shp_processed['fire_rank'] != -2)
shp_processed.loc[mask_missing_meteo, 'fire_rank'] = -3

# Process valid rows
valid_mask = (~missing_mask) & (~mask_no_full_hour)
for (fname, sdate), group in shp_processed[valid_mask].groupby(['fname', 'sdate']):
    valid_rows = group[(group['type'] == 'p') & (group['ros_p'] > 0) & (group['fire_rank'] == 1)]
    if len(valid_rows) > 1:
        max_ros_idx = valid_rows['ros_p'].idxmax()
        min_ros_idx = valid_rows['ros_p'].idxmin()
        shp_processed.loc[max_ros_idx, 'fire_rank'] = 2
        shp_processed.loc[min_ros_idx, 'fire_rank'] = 3

# Display results
rank_counts = shp_processed['fire_rank'].value_counts().sort_index()
print("Fire front rank distribution:")
print(rank_counts)
print(f"/nTotal rows: {len(shp_processed)}")

  s = row['sdate'].ceil('H')  # next full hour
  e = row['edate'].floor('H') # last full hour


Fire front rank distribution:
fire_rank
-3       3
-2      14
-1    1640
 1    1020
 2     339
 3     339
Name: count, dtype: int64
/nTotal rows: 3355


In [6]:
for i, coluna in enumerate(shp_processed.columns, 1):
    print(f"{i:3d}. {coluna}")

  1. fid
  2. fname
  3. year
  4. id
  5. type
  6. sdate
  7. edate
  8. inidoy
  9. enddoy
 10. source
 11. zp_link
 12. burn_perio
 13. area
 14. growth_rat
 15. ros_i
 16. ros_p
 17. spdir_i
 18. spdir_p
 19. int_i
 20. int_p
 21. duration_i
 22. duration_p
 23. qc
 24. 1_3y_fir_p
 25. 3_8y_fir_p
 26. 8_ny_fir_p
 27. elev_av
 28. aspect_av
 29. landform
 30. fuel_model
 31. f_load_av
 32. land_use
 33. land_use_d
 34. CBH_m_av
 35. HigCC_p_av
 36. LowCC_p_av
 37. MidCC_p_av
 38. TotCC_p_av
 39. BLH_m_av
 40. Cape_av
 41. Cin_av
 42. sW_7_av
 43. sW_28_av
 44. sW_100_av
 45. sW_289_av
 46. gp_m2s2_av
 47. t_2m_C_av
 48. d_2m_C_av
 49. sP_hPa_av
 50. wv10_kh_av
 51. wdir10_av
 52. wv100_k_av
 53. wdir100_av
 54. rh_2m_av
 55. VPD_Pa_av
 56. dfmc_av
 57. sW_1m_av
 58. sW_3m_av
 59. LCL_hPa_av
 60. LCL_m_av
 61. HDW_av
 62. Haines_av
 63. wSv_9_av
 64. wSdir_9_av
 65. wSv_7_av
 66. wSdir_7_av
 67. wSv_5_av
 68. wSdir_5_av
 69. wSv_1_av
 70. wSdir_1_av
 71. gT_s_9_av
 72. gT_9_8_av
 73

In [7]:
shp_processed['HigCC_p_av'] = shp_processed['HigCC_p_av'] * 100
shp_processed['MidCC_p_av'] = shp_processed['MidCC_p_av'] * 100
shp_processed['LowCC_p_av'] = shp_processed['LowCC_p_av'] * 100
shp_processed['TotCC_p_av'] = shp_processed['TotCC_p_av'] * 100

shp_processed['sW_1m_av'] = shp_processed['sW_1m_av'] * 100
shp_processed['sW_3m_av'] = shp_processed['sW_3m_av'] * 100
shp_processed['sW_7_av'] = shp_processed['sW_7_av'] * 100
shp_processed['sW_28_av'] = shp_processed['sW_28_av'] * 100
shp_processed['sW_100_av'] = shp_processed['sW_100_av'] * 100
shp_processed['sW_289_av'] = shp_processed['sW_289_av'] * 100

In [8]:
shp_processed['sdate'] = shp_processed['sdate'].astype(str)
shp_processed['edate'] = shp_processed['edate'].astype(str)

In [9]:
os.makedirs(r'../../Data/Processed/PT-FireSprd_v2.1/L2_FireBehavior', exist_ok=True)
shp_processed.to_file(r'../../Data/Processed/PT-FireSprd_v2.1/L2_FireBehavior/PT-FireProg_v2.1_L2_final.shp')