In [1]:
import math
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from scipy import stats
import pandas as pd
import geopandas as gpd
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.ensemble import IsolationForest
from scipy.stats import gaussian_kde, pearsonr
from matplotlib.ticker import FixedLocator


In [2]:
# Carregar shapefiles
gdf_model = gpd.read_file(r"../../Data/Processed/PT-FireSprd_v3.0/L2_FireBehavior/PT-FireSprd_v3.0_L2_model.shp")
gdf_log = gpd.read_file(r"../../Data/Processed/PT-FireSprd_v3.0/L2_FireBehavior/PT-FireSprd_v3.0_L2_model_log.shp")

# Converter para DataFrame puro (sem geometria)
df_model = pd.DataFrame(gdf_model.drop(columns='geometry'))
df_log = pd.DataFrame(gdf_log.drop(columns='geometry'))

In [3]:
# Variáveis a plotar (todas exceto 'ros_p' se não for especificado)
vars_to_plot = ['Cape_av']
vars_to_plot = None

# Determinar se vamos salvar PDF
save_pdf = False
if vars_to_plot is None or len(vars_to_plot) == 0:
    save_pdf = True
    vars_to_plot = [col for col in df_model.columns if col != 'ros_p']

pdf_filename = '../../Data/Data_Exploration/ROS_var_relations_scatter_plot.pdf'
pdf_kde_filename = '../../Data/Data_Exploration/ROS_var_relations_kde_plot.pdf'

# Criar PDF apenas se necessário
pdf = PdfPages(pdf_filename) if save_pdf else None
pdf_kde = PdfPages(pdf_kde_filename) if save_pdf else None

In [4]:
def signed_log(x):
    return np.sign(x) * np.log(np.abs(x) + 1)

def signed_exp(x):
    return np.sign(x) * (np.exp(np.abs(x)) - 1)

from scipy.stats import gaussian_kde
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def weighted_spearmanr(x, y, weights):
    """
    Spearman correlation ponderada por densidade (aproximação).
    """
    x_rank = pd.Series(x).rank()
    y_rank = pd.Series(y).rank()
    w_mean_x = np.average(x_rank, weights=weights)
    w_mean_y = np.average(y_rank, weights=weights)
    cov_xy = np.average((x_rank - w_mean_x) * (y_rank - w_mean_y), weights=weights)
    var_x = np.average((x_rank - w_mean_x)**2, weights=weights)
    var_y = np.average((y_rank - w_mean_y)**2, weights=weights)
    return cov_xy / np.sqrt(var_x * var_y)

for var in vars_to_plot:
    print(f"Plotting {var}...")

    fig, axes = plt.subplots(2, 2, figsize=(20, 15))
    fig.suptitle(f'{var} - Comparison of Transformations', fontsize=16, y=0.95, fontweight='bold')

    plot_configs = [
        ('ROS vs ' + var, 'ros_p', var, False, False),
        ('ROS vs log(' + var + ')', 'ros_p', var, False, True),
        ('log(ROS) vs ' + var, 'ros_p', var, True, False),
        ('log(ROS) vs log(' + var + ')', 'ros_p', var, True, True)
    ]

    for idx, (title, y_col, x_col, log_y, log_x) in enumerate(plot_configs):
        ax = axes[idx // 2, idx % 2]

        x_original = pd.to_numeric(df_model[x_col], errors='coerce')
        y_original = pd.to_numeric(df_model[y_col], errors='coerce')

        x = signed_log(x_original) if log_x else x_original
        y = signed_log(y_original) if log_y else y_original

        mask = x.notna() & y.notna()
        x_clean = x[mask]
        y_clean = y[mask]

        if len(x_clean) < 5:
            ax.text(0.5, 0.5, 'Insufficient data', ha='center', va='center', transform=ax.transAxes, fontsize=12)
            ax.set_title(title, fontsize=12, fontweight='bold')
            continue

        try:
            # KDE para pesos
            xy = np.vstack([x_clean, y_clean])
            z = gaussian_kde(xy)(xy)  # densidade
            idx_sort = z.argsort()
            x_sorted, y_sorted, z_sorted = x_clean.iloc[idx_sort], y_clean.iloc[idx_sort], z[idx_sort]

            # Scatter colorido por densidade
            ax.scatter(x_sorted, y_sorted, c=z_sorted, s=20, cmap='viridis', alpha=0.7)

            # Weighted linear regression (reta ponderada pelos clusters densos)
            model = LinearRegression()
            model.fit(x_sorted.values.reshape(-1,1), y_sorted.values, sample_weight=z_sorted)
            y_fit = model.predict(x_sorted.values.reshape(-1,1))
            ax.plot(x_sorted, y_fit, 'r-', linewidth=3, alpha=0.8, label='Weighted Linear Fit')

            # Weighted Spearman
            weighted_rho = weighted_spearmanr(x_sorted.values, y_sorted.values, z_sorted)

            # Texto com estatísticas
            slope = model.coef_[0]
            intercept = model.intercept_
            slope_str = f"{slope:.3f}" if abs(slope) >= 0.001 else f"{slope:.2e}"
            intercept_str = f"{intercept:.3f}" if abs(intercept) >= 0.001 else f"{intercept:.2e}"
            sign = '+' if intercept >= 0 else '-'
            eq_text = f"y = {slope_str}x {sign} {abs(float(intercept_str)):.3f}"
            stats_text = f"{eq_text}\nWeighted Spearman ρ ≈ {weighted_rho:.3f}"
            ax.text(0.98, 0.98, stats_text, transform=ax.transAxes,
                    fontsize=11, verticalalignment='top', horizontalalignment='right',
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))

            # Formatação
            ax.set_title(title, fontsize=13, fontweight='bold', pad=10)
            ax.set_xlabel(f'log({var})' if log_x else var, fontsize=11, labelpad=8)
            ax.set_ylabel('log(ROS) m/h' if log_y else 'ROS m/h', fontsize=11, labelpad=8)
            ax.grid(True, linestyle='--', alpha=0.3)
            ax.legend(fontsize=10, loc='lower right')

            # Eixo secundário Y (ROS linear) se log_y
            if log_y:
                ax2_y = ax.secondary_yaxis('right', functions=(signed_exp, signed_log))
                ax2_y.set_ylabel('ROS', fontsize=11, labelpad=8)
                ax2_y.tick_params(axis='y', labelsize=9)
                ticks = list(range(0, 11, 2)) + list(range(0, 101, 20)) + list(range(0, 1001, 200)) + list(range(0, 10001, 2000))
                ax2_y.set_yticks(ticks)

            # Eixo secundário X (var linear) se log_x
            if log_x:
                ax2_x = ax.secondary_xaxis('top', functions=(signed_exp, signed_log))
                ax2_x.set_xlabel(f'{var}', fontsize=11, labelpad=8)
                ax2_x.tick_params(axis='x', labelsize=9)
                ax2_x.set_xticks(list(range(0, 11, 2)) + list(range(0, 101, 20)) + list(range(0, 1001, 200)) + list(range(0, 10001, 2000)))

        except Exception as e:
            ax.text(0.5, 0.5, f'Error: {str(e)}', ha='center', va='center', transform=ax.transAxes, fontsize=12)
            ax.set_title(title, fontsize=12, fontweight='bold')

    plt.tight_layout(pad=3.0)

    if save_pdf:
        pdf.savefig(fig, bbox_inches='tight')
        plt.close(fig)
    else:
        plt.show()

if save_pdf and pdf is not None:
    pdf.close()
    print(f"All plots saved in '{pdf_filename}'")


Plotting duration_p...


Plotting elev_av...
Plotting aspect_sin...
Plotting aspect_cos...
Plotting landform...
Plotting land_use...
Plotting 1_3y_fir_p...
Plotting 3_8y_fir_p...
Plotting 8_ny_fir_p...
Plotting fuel_age...
Plotting fuel_model...
Plotting f_load_av...
Plotting sW_1m_av...
Plotting sW_3m_av...
Plotting sW_7_av...
Plotting sW_28_av...
Plotting sW_100_av...
Plotting sW_289_av...
Plotting t_2m_C_av...
Plotting d_2m_C_av...
Plotting rh_2m_av...
Plotting VPD_Pa_av...
Plotting sP_hPa_av...
Plotting gp_m2s2_av...
Plotting dfmc_av...
Plotting HDW_av...
Plotting Haines_av...
Plotting FWI_12h_av...
Plotting DC_12h_av...
Plotting FFMC_12h_a...
Plotting wv10_kh_av...
Plotting wsin10_av...
Plotting wcos10_av...
Plotting wv100_k_av...
Plotting wsin100_av...
Plotting wcos100_av...
Plotting Recirc...
Plotting CircVar...
Plotting t_950_av...
Plotting t_850_av...
Plotting t_700_av...
Plotting t_500_av...
Plotting t_300_av...
Plotting rh_950_av...
Plotting rh_850_av...
Plotting rh_700_av...
Plotting rh_500_av...
P

In [5]:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import gaussian_kde, linregress

# Funções de transformação
def signed_log(x):
    return np.sign(x) * np.log(np.abs(x) + 1)

def signed_exp(x):
    return np.sign(x) * (np.exp(np.abs(x)) - 1)

for var in vars_to_plot:
    print(f"Plotando KDE para {var}...")

    fig, axes = plt.subplots(2, 2, figsize=(20, 15))
    fig.suptitle(f'{var} - KDE Density Plots', fontsize=16, y=0.95, fontweight='bold')

    plot_configs = [
        ('ROS vs ' + var, 'ros_p', var, False, False),
        ('ROS vs log(' + var + ')', 'ros_p', var, False, True),
        ('log(ROS) vs ' + var, 'ros_p', var, True, False),
        ('log(ROS) vs log(' + var + ')', 'ros_p', var, True, True)
    ]

    for idx, (title, y_col, x_col, log_y, log_x) in enumerate(plot_configs):
        ax = axes[idx // 2, idx % 2]

        # Preparar dados
        x_orig = pd.to_numeric(df_model[x_col], errors='coerce')
        y_orig = pd.to_numeric(df_model[y_col], errors='coerce')

        x = signed_log(x_orig) if log_x else x_orig
        y = signed_log(y_orig) if log_y else y_orig

        mask = x.notna() & y.notna()
        x_clean, y_clean = x[mask], y[mask]

        if len(x_clean) < 5:
            ax.text(0.5, 0.5, 'Dados insuficientes', ha='center', va='center', transform=ax.transAxes, fontsize=12)
            ax.set_title(title, fontsize=12, fontweight='bold')
            continue

        try:
            # Regressão linear
            slope, intercept, r_value, p_value, _ = linregress(x_clean, y_clean)
            r2 = r_value**2

            # KDE contínuo
            xy = np.vstack([x_clean, y_clean])
            kde = gaussian_kde(xy)
            xx, yy = np.mgrid[x_clean.min():x_clean.max():100j, y_clean.min():y_clean.max():100j]
            positions = np.vstack([xx.ravel(), yy.ravel()])
            z = np.reshape(kde(positions).T, xx.shape)
            cf = ax.contourf(xx, yy, z, levels=20, cmap='viridis', alpha=0.8)
            cbar = plt.colorbar(cf, ax=ax, label='Density', pad=0.1)
            cbar.ax.tick_params(labelsize=9)

            # Linha de regressão
            x_fit = np.linspace(x_clean.min(), x_clean.max(), 100)
            y_fit = intercept + slope * x_fit
            ax.plot(x_fit, y_fit, 'r-', linewidth=3, alpha=0.8, label='Regression line')

            # Texto com equação e estatísticas
            slope_str = f"{slope:.3f}" if abs(slope) >= 0.001 else f"{slope:.2e}"
            intercept_str = f"{intercept:.3f}" if abs(intercept) >= 0.001 else f"{intercept:.2e}"
            sign = '+' if intercept >= 0 else '-'
            eq_text = f"y = {slope_str}x {sign} {abs(float(intercept_str)):.3f}"
            stats_text = f"r = {r_value:.3f}\nR² = {r2:.3f}\np = {p_value:.3f}"
            ax.text(0.98, 0.98, f"{eq_text}\n{stats_text}",
                    transform=ax.transAxes, fontsize=11,
                    verticalalignment='top', horizontalalignment='right',
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))

            # Labels e título
            ax.set_title(title, fontsize=13, fontweight='bold')
            ax.set_xlabel(f'log({var})' if log_x else var, fontsize=11)
            ax.set_ylabel('log(ROS) m/h' if log_y else 'ROS m/h', fontsize=11)
            ax.grid(True, linestyle='--', alpha=0.3)
            ax.legend(fontsize=10, loc='lower right')

            # Eixo secundário Y (ROS linear) se log_y
            if log_y:
                ax2_y = ax.secondary_yaxis('right', functions=(signed_exp, signed_log))
                ax2_y.set_ylabel('ROS', fontsize=11, labelpad=8)
                ax2_y.tick_params(axis='y', labelsize=9)
                ticks = list(range(0, 11, 2)) + list(range(0, 101, 20)) + list(range(0, 1001, 200)) + list(range(0, 10001, 2000))
                ax2_y.set_yticks(ticks)

            # Eixo secundário X (var linear) se log_x
            if log_x:
                ax2_x = ax.secondary_xaxis('top', functions=(signed_exp, signed_log))
                ax2_x.set_xlabel(f'{var}', fontsize=11, labelpad=8)
                ax2_x.tick_params(axis='x', labelsize=9)
                ax2_x.set_xticks(range(0, 11, 2)) + list(range(0, 101, 20)) + list(range(0, 1001, 200)) + list(range(0, 10001, 2000))

        except Exception as e:
            ax.text(0.5, 0.5, f'Erro: {str(e)}', ha='center', va='center', transform=ax.transAxes, fontsize=12)
            ax.set_title(title, fontsize=12, fontweight='bold')

    plt.tight_layout(pad=3.0)

    if save_pdf:
        pdf_kde.savefig(fig, bbox_inches='tight')
        plt.close(fig)
    else:
        plt.show()

if save_pdf and pdf_kde is not None:
    pdf_kde.close()
    print(f"Todos os KDE plots salvos em '{pdf_kde_filename}'")


Plotando KDE para duration_p...
Plotando KDE para elev_av...
Plotando KDE para aspect_sin...
Plotando KDE para aspect_cos...
Plotando KDE para landform...
Plotando KDE para land_use...
Plotando KDE para 1_3y_fir_p...
Plotando KDE para 3_8y_fir_p...
Plotando KDE para 8_ny_fir_p...
Plotando KDE para fuel_age...
Plotando KDE para fuel_model...
Plotando KDE para f_load_av...
Plotando KDE para sW_1m_av...
Plotando KDE para sW_3m_av...
Plotando KDE para sW_7_av...
Plotando KDE para sW_28_av...
Plotando KDE para sW_100_av...
Plotando KDE para sW_289_av...
Plotando KDE para t_2m_C_av...
Plotando KDE para d_2m_C_av...
Plotando KDE para rh_2m_av...
Plotando KDE para VPD_Pa_av...
Plotando KDE para sP_hPa_av...
Plotando KDE para gp_m2s2_av...
Plotando KDE para dfmc_av...
Plotando KDE para HDW_av...
Plotando KDE para Haines_av...
Plotando KDE para FWI_12h_av...
Plotando KDE para DC_12h_av...
Plotando KDE para FFMC_12h_a...
Plotando KDE para wv10_kh_av...
Plotando KDE para wsin10_av...
Plotando KDE 