In [2]:
import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
import pickle
import itertools
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import matplotlib.colors as mcolors
from matplotlib.patches import Polygon as MplPolygon
import geopandas as gpd
import chronnet_utils as cu 
import warnings
from scipy.linalg import eigvals
from scipy import optimize
from scipy.stats import pearsonr, spearmanr
from sklearn.linear_model import LinearRegression
from datetime import datetime
import networkx as nx
from collections import Counter
from sis_steady_state_and_eval import jsd_from_samples,nimfa_sis_steady_state_root_1,recognition_quality

from scipy.stats import spearmanr, kendalltau
from sklearn.metrics import ndcg_score, average_precision_score




from compute_y_true import compute_y_true_metrics as compute_y_true
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")




# Build Chronnet for each grid size 

In [49]:
# grid_size : range from 2000 to 7000


min_weight =2
dmax_scale=2
grid_sizes=[1000,2000, 2375, 2750, 3000, 4000, 5000, 6000, 7000]
dmax_vals = [x * dmax_scale * np.sqrt(3) for x in grid_sizes]
time_bin= '12h'  # time bin for grouping fire events

shapefile_path = "/Users/mabelhu/Desktop/Code/DL_FIRE_SV-C2_576237/fire_archive_SV-C2_576237.shp"
gdf = gpd.read_file(shapefile_path)
gdf = gdf[gdf['CONFIDENCE'].isin(['h', 'n'])]
# transform to meter-based
if gdf.crs.to_string() == 'EPSG:4326':
    gdf = gdf.to_crs(epsg=3857)
# convert to datetime
gdf['ACQ_TIME'] = gdf['ACQ_TIME'].astype(str).str.zfill(4)
gdf['acq_time'] = pd.to_datetime(
    gdf['ACQ_DATE'].astype(str) + ' ' + gdf['ACQ_TIME'],
    format='%Y-%m-%d %H%M'
)
gdf['acq_time'] = gdf['acq_time'].apply(pd.Timestamp)





chronnet_graph_list =[]
chronnet_result_list = []

for hex_size, d_max in zip(grid_sizes, dmax_vals):

    hex_grid = cu.create_hex_grid(gdf,hex_size)
    gdf_sjoined = gpd.sjoin(
        gdf,
        hex_grid[['cell', 'geometry']],
        how='left',
        predicate='within',
        rsuffix='_hex'
    ).dropna(subset=['cell'])

    gdf_sjoined['time_group'] = gdf_sjoined['acq_time'].dt.floor(time_bin)

    valid_cells = gdf_sjoined['cell'].unique()
    hex_grid_filtered = hex_grid[hex_grid['cell'].isin(valid_cells)]

    chronnet = cu.build_chronnet_freq(gdf_sjoined, dmax=d_max, freq=time_bin)
    chronnet_pruned = cu.prune_chronnet(chronnet, min_weight=min_weight)


    # decribe the network  
    total_nodes_before_pruning = chronnet.number_of_nodes()
    total_nodes = chronnet_pruned.number_of_nodes()
    total_edges_before_pruning = chronnet.number_of_edges()
    total_edges = chronnet_pruned.number_of_edges()
    total_strength = sum(dict(chronnet_pruned.degree(weight='weight')).values())
    total_strength_before_pruning = sum(dict(chronnet.degree(weight='weight')).values())


    # compute the largest strongly connected component (SCC)
    scc = sorted(nx.strongly_connected_components(chronnet_pruned), key=len, reverse=True)
    if len(scc) > 0:
        largest_scc = scc[0]
        largest_scc_nodes = len(largest_scc)
        largest_scc_ratio = len(largest_scc) / total_nodes
    else:
        largest_cc_nodes = 0
        largest_scc_ratio =0
   
   # compute network-driven fire %

    df_fire = gdf_sjoined[gdf_sjoined['cell'].isin(valid_cells)].copy()
    df_fire = df_fire[df_fire['cell'].isin(chronnet_pruned.nodes())] # remove cell not in chronnet_pruned


    burning_by_time = df_fire.groupby('time_group')['cell'].apply(set).to_dict()
    network_events = []
    sf_events = []
    self_loops_events = []
    for t, cells in burning_by_time.items():
        prev_cells = burning_by_time.get(t - pd.Timedelta(hours=12), set())
        for node in cells:
            neis = set(chronnet_pruned.neighbors(node))
            if node in prev_cells:
                self_loops_events.append((node, t))
            elif not prev_cells.intersection(neis):
                sf_events.append((node, t))
            else:
                network_events.append((node, t))

    df_network_fire = pd.DataFrame(network_events, columns=['cell','time_group']).drop_duplicates()
    grouped_df_fire_unique = df_fire[['cell','time_group']].drop_duplicates()
    df_sf_fire = pd.DataFrame(sf_events, columns=['cell','time_group']).drop_duplicates() 
    df_self_loops = pd.DataFrame(self_loops_events, columns=['cell','time_group']).drop_duplicates()


    # create a subset of df_network_fire with cells in largest_scc
    df_network_fire['in_largest_scc'] = df_network_fire['cell'].isin(largest_scc)
    df_sf_fire['in_largest_scc'] = df_sf_fire['cell'].isin(largest_scc)
    df_self_loops['in_largest_scc'] = df_self_loops['cell'].isin(largest_scc)


    
    chronnet_graph_list.append({
        'grid_size': hex_size,
        'd_max': d_max,
        'chronnet': chronnet,
        'chronnet_pruned': chronnet_pruned
    
    })

    chronnet_result_list.append({
        'grid_size': hex_size,
        'd_max': d_max,
        'gdf_sjoined': gdf_sjoined,
        'total_nodes_after_pruning': total_nodes,
        'total_nodes_before_pruning': total_nodes_before_pruning,
        'total_edges_after_pruning': total_edges,
        'total_edges_before_pruning': total_edges_before_pruning,
        'total_strength_after_pruning': total_strength,
        'total_strength_before_pruning': total_strength_before_pruning,


        'largest_scc_nodes': largest_scc_nodes,
        'largest_scc': largest_scc,
        'largest_scc_ratio': largest_scc_ratio,

        'grouped_df_fire_unique': grouped_df_fire_unique,
        'df_network_fire': df_network_fire,
        'df_sf_fire': df_sf_fire,
        'df_self_loops': df_self_loops,


        'n_all_fire-events': len(network_events) + len(sf_events) + len(self_loops_events),
        'n_network_events': len(network_events),
        'n_sf_events': len(sf_events),
        'n_self_loops_events': len(self_loops_events),

        'n_network_fire': len(df_network_fire),
        'n_sf_fire': len(df_sf_fire),
        'n_self_loops': len(df_self_loops),

        'n_network_fire_in_largest_scc': len(df_network_fire[df_network_fire['in_largest_scc']]),
        'n_sf_fire_in_largest_scc': len(df_sf_fire[df_sf_fire['in_largest_scc']]),
        'n_self_loops_in_largest_scc': len(df_self_loops[df_self_loops['in_largest_scc']]),

        'n_fire_cells_in_chronnet_pruned': len(df_fire['cell'].unique()),
        'n_fire_cells_in_largest_scc': len(df_fire[df_fire['cell'].isin(largest_scc)]['cell'].unique()),
    })


    

In [None]:
# save the chronnet_graph_list and chronnet_result_list to pickle files
with open(f'chronnet_graph_list.pkl', 'wb') as f:
    pickle.dump(chronnet_graph_list, f)
with open(f'chronnet_result_list.pkl', 'wb') as f:
    pickle.dump(chronnet_result_list, f)

# Gird Size Sensitivety Analysis

In [None]:
# create a subset of chronnet_result_list with only some columns



keys = [
    'grid_size',
    'd_max',
    'total_nodes_before_pruning',
    'total_edges_before_pruning',
    'total_strength_before_pruning',
    'total_nodes_after_pruning',
    'total_edges_after_pruning',
    'total_strength_after_pruning',
    'largest_scc_nodes',
    'largest_scc_ratio',
    'n_all_fire-events',
    'n_network_events',
    'n_sf_events',
    'n_self_loops_events',
    'n_network_fire',
    'n_sf_fire',
    'n_self_loops',
    'n_network_fire_in_largest_scc',
    'n_sf_fire_in_largest_scc',
    'n_self_loops_in_largest_scc'
]
df = pd.DataFrame(chronnet_result_list)[keys]
df['ratio_net_events'] = df['n_network_events'] / df['n_all_fire-events']
df['ratio_sf_events'] = df['n_sf_events'] / df['n_all_fire-events']
df['ratio_self_loops_events'] = df['n_self_loops_events'] / df['n_all_fire-events']


df['ratio_net']= df['n_network_fire'] /(df['n_network_fire'] + df['n_sf_fire'] + df['n_self_loops'])
df['ratio_sf']= df['n_sf_fire'] /(df['n_network_fire'] + df['n_sf_fire'] + df['n_self_loops'])
df['ratio_self_loops']= df['n_self_loops'] /(df['n_network_fire'] + df['n_sf_fire'] + df['n_self_loops'])


df['ratio_net_in_largest_scc']= df['n_network_fire_in_largest_scc'] /(df['n_network_fire_in_largest_scc'] + df['n_sf_fire_in_largest_scc'] + df['n_self_loops_in_largest_scc'])
df['ratio_sf_in_largest_scc']= df['n_sf_fire_in_largest_scc'] /(df['n_network_fire_in_largest_scc'] + df['n_sf_fire_in_largest_scc'] + df['n_self_loops_in_largest_scc'])
df['ratio_self_loops_in_largest_scc']= df['n_self_loops_in_largest_scc'] /(df['n_network_fire_in_largest_scc'] + df['n_sf_fire_in_largest_scc'] + df['n_self_loops_in_largest_scc'])

df

In [None]:
from adjustText import adjust_text
import numpy as np
import matplotlib.pyplot as plt


min_size, max_size = 100, 200
nn = df['largest_scc_nodes']
nn_min, nn_max = nn.min(), nn.max()
df['size'] = (nn - nn_min) / (nn_max - nn_min) * (max_size - min_size) + min_size

unique_sizes = np.sort(df['grid_size'].unique())
colors = plt.cm.viridis(np.linspace(0, 1, len(unique_sizes)))
color_map = dict(zip(unique_sizes, colors))

fig, ax = plt.subplots(figsize=(12, 6))
texts = []
for gs, sub in df.groupby('grid_size'):
    ax.scatter(
        sub['largest_scc_ratio'],
        sub['ratio_net_in_largest_scc'] + sub['ratio_self_loops_in_largest_scc'],
        color=color_map[gs],
        s=sub['size'],
        alpha=0.7,
        label=f'{gs}m'  
    )
    for x, y in zip(
        sub['largest_scc_ratio'],
        sub['ratio_net_in_largest_scc'] + sub['ratio_self_loops_in_largest_scc']
    ):
        texts.append(ax.text(x, y, f'{gs}m', fontsize=18))

adjust_text(
    texts,
    arrowprops=dict(arrowstyle='-', color='black', lw=0.5),
    expand_text=(1.1, 1.1),
    expand_points=(1.2, 1.2)
)


ax.set_xlabel('largest SCC node %', fontsize=18)
ax.set_ylabel('network events ratio in largest SCC', fontsize=18)


ax.tick_params(axis='both', labelsize=18)

plt.tight_layout()



# Network Topology Results

## Degree

In [None]:

grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

fig, axes = plt.subplots(nrows=len(grid_sizes), ncols=3, figsize=(16, 12))

# Plot degree distributions for each grid size and its top 3 SCCs
for i, size in enumerate(grid_sizes):
    graph = network_list.get(size)
    
    # Full graph degree distribution
    degrees = [d for _, d in graph.degree()]
    total_nodes = graph.number_of_nodes()
    deg_counts = Counter(degrees)
    degs = sorted(deg_counts)
    props = [deg_counts[d] / total_nodes for d in degs]
    
    ax = axes[i, 0]
    ax.scatter(degs, props)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_title(f'Grid Size {size} - Full Graph')
    ax.set_xlabel('Degree')
    ax.set_ylabel('Proportion of Nodes')
    
    # Top 3 SCCs degree distributions
    sccs = sorted(nx.strongly_connected_components(graph), key=len, reverse=True)[:2]
    for j, scc in enumerate(sccs, start=1):
        subg = graph.subgraph(scc)
        degrees_scc = [d for _, d in subg.degree()]
        total_scc = subg.number_of_nodes()
        counts_scc = Counter(degrees_scc)
        deg_vals_scc = sorted(counts_scc)
        props_scc = [counts_scc[d] / total_scc for d in deg_vals_scc]
        
        ax = axes[i, j]
        ax.scatter(deg_vals_scc, props_scc)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_title(f'Grid Size {size} - SCC {j}')
        ax.set_xlabel('Degree')

fig.tight_layout()
plt.show()



In [None]:



grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

fig, axes = plt.subplots(nrows=len(grid_sizes), ncols=3, figsize=(16, 12))

# Plot degree distributions for each grid size and its top 3 SCCs
for i, size in enumerate(grid_sizes):
    graph = network_list.get(size)
    
    # Full graph degree distribution
    degrees = [d for _, d in graph.degree()]
    total_nodes = graph.number_of_nodes()
    deg_counts = Counter(degrees)
    degs = sorted(deg_counts)
    props = [deg_counts[d] / total_nodes for d in degs]
    
    ax = axes[i, 0]
    ax.bar(degs, props, width=0.8, color='skyblue', edgecolor='k')
   # ax.scatter(degs, props)
    ax.set_title(f'Grid Size {size} - Full Graph')
    ax.set_xlabel('Degree')
    ax.set_ylabel('Proportion of Nodes')
    
    # Top 3 SCCs degree distributions
    sccs = sorted(nx.strongly_connected_components(graph), key=len, reverse=True)[:2]
    for j, scc in enumerate(sccs, start=1):
        subg = graph.subgraph(scc)
        degrees_scc = [d for _, d in subg.degree()]
        total_scc = subg.number_of_nodes()
        counts_scc = Counter(degrees_scc)
        deg_vals_scc = sorted(counts_scc)
        props_scc = [counts_scc[d] / total_scc for d in deg_vals_scc]
        
        ax = axes[i, j]
        ax.bar(deg_vals_scc, props_scc, width=0.8, color='skyblue', edgecolor='k')
        #ax.scatter(deg_vals_scc, props_scc)
        
        ax.set_title(f'Grid Size {size} - SCC {j}')
        ax.set_xlabel('Degree')

fig.tight_layout()
plt.show()



## Strength

In [None]:

grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

fig, axes = plt.subplots(nrows=len(grid_sizes), ncols=3, figsize=(16, 12))

# Plot strength distributions for each grid size and its top 2 SCCs
for i, size in enumerate(grid_sizes):
    graph = network_list.get(size)
    
    # Full graph strength distribution
    strengths = [s for _, s in graph.degree(weight='weight')]
    total_nodes = graph.number_of_nodes()
    str_counts = Counter(strengths)
    strs = sorted(str_counts)
    props = [str_counts[s] / total_nodes for s in strs]
    
    ax = axes[i, 0]
    ax.scatter(strs, props)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_title(f'Grid Size {size} - Full Graph Strength')
    ax.set_xlabel('Strength') # (log scale)
    ax.set_ylabel('Proportion of Nodes') #(log scale)
    
    # Top 2 SCCs strength distributions
    sccs = sorted(nx.strongly_connected_components(graph), key=len, reverse=True)[:2]
    for j, scc in enumerate(sccs, start=1):
        subg = graph.subgraph(scc)
        strengths_scc = [s for _, s in subg.degree(weight='weight')]
        total_scc = subg.number_of_nodes()
        counts_scc = Counter(strengths_scc)
        str_vals_scc = sorted(counts_scc)
        props_scc = [counts_scc[s] / total_scc for s in str_vals_scc]
        
        ax = axes[i, j]
        ax.scatter(str_vals_scc, props_scc)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_title(f'Grid Size {size} - SCC {j} Strength')
        ax.set_xlabel('Strength ') #(log scale)

fig.tight_layout()
plt.show()




## Compute Topology metrics for each SCC

In [None]:
import networkx as nx

grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}
results = []
for size in grid_sizes:
    graph = network_list.get(size)
    N_full = graph.number_of_nodes()
    M_full = graph.number_of_edges()
    
    
    sccs = sorted(nx.strongly_connected_components(graph), key=len, reverse=True)[:2]
    for j, scc in enumerate(sccs, start=1):
        subg = graph.subgraph(scc).copy()
        N_sub = subg.number_of_nodes()
        M_sub = subg.number_of_edges()
        prop_nodes = N_sub / N_full
        prop_edges = M_sub / M_full
        
        if N_sub > 1:
            L = nx.average_shortest_path_length(subg)
        else:
            L = 0.0
        
        C = nx.average_clustering(subg)

        results.append({
            'grid_size': size,
            'scc_rank': j,
            'nodes': N_sub,
            'edges': M_sub,
            'prop_nodes': prop_nodes,
            'prop_edges': prop_edges,
            'avg_path_length': L,
            'transitivity': C
        })

df = pd.DataFrame(results)

df = df[
    ['grid_size', 'scc_rank', 'nodes', 'edges',
     'prop_nodes', 'prop_edges',
     'avg_path_length', 'transitivity']
]

print(df)

In [None]:
import networkx as nx
import pandas as pd
import numpy as np

grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

results = []
for size in grid_sizes:
    graph = network_list.get(size)
    N_full = graph.number_of_nodes()
    M_full = graph.number_of_edges()
    
    sccs = sorted(nx.strongly_connected_components(graph), key=len, reverse=True)[:2]
    for j, scc in enumerate(sccs, start=1):
        subg = graph.subgraph(scc).copy()
        N_sub = subg.number_of_nodes()
        M_sub = subg.number_of_edges()
        prop_nodes = N_sub / N_full
        prop_edges = M_sub / M_full
        
        # Observed metrics
        if N_sub > 1:
            L = nx.average_shortest_path_length(subg)
        else:
            L = 0.0
        C = nx.average_clustering(subg)

        # Small-world coefficient: compare to null model
        L_rands = []
        C_rands = []
        for _ in range(10):  # 10 random samples
            G_rand = nx.gnm_random_graph(n=N_sub, m=M_sub, directed=True)
            # compute metrics on largest SCC of random graph
            comp = max(nx.strongly_connected_components(G_rand), key=len)
            Gc = G_rand.subgraph(comp)
            if Gc.number_of_nodes() > 1:
                L_rands.append(nx.average_shortest_path_length(Gc))
            else:
                L_rands.append(np.nan)
            C_rands.append(nx.average_clustering(Gc))
        L_rand = np.nanmean(L_rands)
        C_rand = np.mean(C_rands)
        
        if L_rand and L:
            sigma = (C / C_rand) / (L / L_rand)
        else:
            sigma = np.nan

        results.append({
            'grid_size': size,
            'scc_rank': j,
            'nodes': N_sub,
            'edges': M_sub,
            'prop_nodes': prop_nodes,
            'prop_edges': prop_edges,
            'avg_path_length': L,
            'transitivity': C,
            'small_worldness': sigma
        })

# Create DataFrame
df = pd.DataFrame(results)
# Reorder columns
df = df[[
    'grid_size', 'scc_rank', 'nodes', 'edges',
    'prop_nodes', 'prop_edges',
    'avg_path_length', 'transitivity', 'small_worldness'
]]

print(df)


# Compute Ground Truth and node level metrics 

In [None]:

min_weight =2
dmax_scale=2
grid_sizes=[2000,3000, 5000]
dmax_vals = [x * dmax_scale * np.sqrt(3) for x in grid_sizes]
time_bin= '12h'  # time bin for grouping fire events

shapefile_path = "/Users/mabelhu/Desktop/Code/DL_FIRE_SV-C2_576237/fire_archive_SV-C2_576237.shp"
gdf = gpd.read_file(shapefile_path)
gdf = gdf[gdf['CONFIDENCE'].isin(['h', 'n'])]
# transform to meter-based
if gdf.crs.to_string() == 'EPSG:4326':
    gdf = gdf.to_crs(epsg=3857)
# convert to datetime
gdf['ACQ_TIME'] = gdf['ACQ_TIME'].astype(str).str.zfill(4)
gdf['acq_time'] = pd.to_datetime(
    gdf['ACQ_DATE'].astype(str) + ' ' + gdf['ACQ_TIME'],
    format='%Y-%m-%d %H%M'
)
gdf['acq_time'] = gdf['acq_time'].apply(pd.Timestamp)


all_y_true      = {}
all_y_true_sf   = {}
all_y_true_net  = {}
all_y_true_sis  = {}


burning_result_list = []
node_metrics_list = []

for hex_size, d_max in zip(grid_sizes, dmax_vals):

    hex_grid = cu.create_hex_grid(gdf,hex_size)
    gdf_sjoined = gpd.sjoin(
        gdf,
        hex_grid[['cell', 'geometry']],
        how='left',
        predicate='within',
        rsuffix='_hex'
    ).dropna(subset=['cell'])

    gdf_sjoined['time_group'] = gdf_sjoined['acq_time'].dt.floor(time_bin)

    valid_cells = gdf_sjoined['cell'].unique()
    hex_grid_filtered = hex_grid[hex_grid['cell'].isin(valid_cells)]

    chronnet = cu.build_chronnet_freq(gdf_sjoined, dmax=d_max, freq=time_bin)
    subG = cu.prune_chronnet(chronnet, min_weight=min_weight)

    print(f"Grid size: {hex_size}, Number of nodes: {len(subG.nodes())}, Number of edges: {len(subG.edges())}")
    valid_cells = set(subG.nodes())
    df_fire = gdf_sjoined[gdf_sjoined['cell'].isin(valid_cells)].copy()


    burning_by_time = df_fire.groupby('time_group')['cell'].apply(set).to_dict()
    network_events = []
    sf_events = []
    self_loops_events = []
    for t, cells in burning_by_time.items():
        prev_t = t - pd.Timedelta(hours=12)
        prev_cells = burning_by_time.get(prev_t, set())
        for node in cells:
            neis = set(subG.neighbors(node))
            if prev_cells.intersection(neis):
                network_events.append((node, t))
            else:
                sf_events.append((node, t))


    cc_node_list = list(subG.nodes())

    

    #### Network events
    df_network_fire = pd.DataFrame(network_events, columns=['cell', 'time_group']).drop_duplicates()

    grouped_net = df_network_fire.groupby('cell').size().reset_index(name='burning_count_net')
    count_dict_net = dict(zip(grouped_net['cell'], grouped_net['burning_count_net']))


    y_true_network = np.array([count_dict_net.get(node, 0) for node in cc_node_list], dtype=float)

    time_cnt = df_fire['time_group'].nunique()
    y_true_network = y_true_network/time_cnt


    ##### SF events
    df_sf_fire = pd.DataFrame(sf_events, columns=['cell', 'time_group']).drop_duplicates()
    grouped_sf = df_sf_fire.groupby('cell').size().reset_index(name='burning_count_sf')
    count_dict_sf = dict(zip(grouped_sf['cell'], grouped_sf['burning_count_sf']))
    y_true_sf = np.array([count_dict_sf.get(node, 0) for node in cc_node_list], dtype=float)

    time_cnt = df_fire['time_group'].nunique()
    y_true_sf = y_true_sf / time_cnt


    ### all fire events

    grouped_df_fire_unique = df_fire[['cell','time_group']].drop_duplicates()
    grouped_burning_count_per_cell = grouped_df_fire_unique.groupby('cell').size().reset_index(name='burning_count')
    count_dict = dict(zip(grouped_burning_count_per_cell['cell'],
                          grouped_burning_count_per_cell['burning_count']))
    y_true = np.array([count_dict.get(node, 0)
                       for node in cc_node_list], dtype=float)
    time_cnt = len(df_fire['time_group'].unique() )
    y_true = y_true / time_cnt


    # y_true_sis
    y_true_sis = (y_true - y_true_sf)/(1-y_true_sf) 


    # dict
    y_true_dict = dict(zip(cc_node_list, y_true))
    y_true_sf_dict = dict(zip(cc_node_list, y_true_sf))
    y_true_network_dict = dict(zip(cc_node_list, y_true_network))
    y_true_sis_dict = dict(zip(cc_node_list, y_true_sis))


    all_y_true[hex_size]     = y_true_dict
    all_y_true_sf[hex_size]  = y_true_sf_dict
    all_y_true_net[hex_size] = y_true_network_dict
    all_y_true_sis[hex_size] = y_true_sis_dict

    burning_result_list.append({
        'grid_size': hex_size,
        'y_true': y_true_dict,
        'y_true_sf': y_true_sf_dict,
        'y_true_network': y_true_network_dict,
        'y_true_sis': y_true_sis_dict}
    )

    hex_grid_filtered['network_p'] = hex_grid_filtered['cell'].map(y_true_network_dict).fillna(0)
    hex_grid_filtered['sf_p'] = hex_grid_filtered['cell'].map(y_true_sf_dict).fillna(0)





    ######### plot
    #fig, ax = plt.subplots(1, 1, figsize=(6, 6), dpi=300)
    #hex_grid_filtered.plot(column='network_p', cmap='hot', legend=True, ax=ax,)# edgecolor='gray'
    #plt.title("Historical Network-driven Fire Probability Distribution")
    #plt.axis('equal')  # Ensure proper aspect ratio
    #fig.savefig(f"network_probability_size_{hex_size}.pdf", format='pdf', bbox_inches='tight')
    #plt.show()
#
    #fig, ax = plt.subplots(1, 1, figsize=(6, 6), dpi=300)
    #hex_grid_filtered.plot(column='sf_p', cmap='hot', legend=True, ax=ax,)# edgecolor='gray'
    #plt.title("Historical Spontaneous Fire Probability Distribution")
    #plt.axis('equal')  # Ensure proper aspect ratio
    #fig.savefig(f"sf_probability_size_{hex_size}.pdf", format='pdf', bbox_inches='tight')
    #plt.show()
#



    ######### compute node metrics
    in_deg_dict = dict(subG.in_degree())
    out_deg_dict = dict(subG.out_degree())
    in_str_dict = dict(subG.in_degree(weight='weight'))
    out_str_dict = dict(subG.out_degree(weight='weight'))


    deg_df = pd.DataFrame({
    'cell':        cc_node_list,
    'degree':     [subG.degree(cell) for cell in cc_node_list],
    'strength':   [subG.degree(cell, weight='weight') for cell in cc_node_list],
    'in_degree':   [in_deg_dict[cell]   for cell in cc_node_list],
    'out_degree':  [out_deg_dict[cell]  for cell in cc_node_list],
    'in_strength': [in_str_dict[cell]   for cell in cc_node_list],
    'out_strength':[out_str_dict[cell]  for cell in cc_node_list],
    'v_true_sis': [y_true_sis_dict[cell] for cell in cc_node_list],
    'v_true_sf':  [y_true_sf_dict[cell]  for cell in cc_node_list],
    'v_true_net': [y_true_network_dict[cell] for cell in cc_node_list],
    'v_true':     [y_true_dict[cell]    for cell in cc_node_list]
    })


    # merge the degree dataframe with the burning count dataframe
    node_metrics = (grouped_burning_count_per_cell.merge(deg_df, on='cell', how='left')
                 .merge(grouped_sf, on='cell', how='left')
                 .merge(grouped_net, on='cell', how='left')
    )
    node_metrics_list.append({
        'grid_size': hex_size,
        'node_metrics': node_metrics
    })



In [None]:
# save node_metrics_list and burning_result_list to pickle files
with open(f'node_metrics_list.pkl', 'wb') as f:
    pickle.dump(node_metrics_list, f)   

with open(f'burning_result_list.pkl', 'wb') as f:
    pickle.dump(burning_result_list, f) 
# save all_y_true, all_y_true_sf, all_y_true_net, all_y_true_sis to pickle files
with open(f'all_y_true.pkl', 'wb') as f:
    pickle.dump(all_y_true, f)
with open(f'all_y_true_sf.pkl', 'wb') as f:
    pickle.dump(all_y_true_sf, f)       
with open(f'all_y_true_net.pkl', 'wb') as f:
    pickle.dump(all_y_true_net, f)
with open(f'all_y_true_sis.pkl', 'wb') as f:
    pickle.dump(all_y_true_sis, f)  



# Compute the node-level metrics in SCCs 

In [None]:
with open('burning_result_list.pkl', 'rb') as f:
    burning_result_list = pickle.load(f)

all_y_true_sis = {
    item['grid_size']: item['y_true_sis']
    for item in burning_result_list
}
all_y_true_sf = {
    item['grid_size']: item['y_true_sf']
    for item in burning_result_list     
}
all_y_true_net = {
    item['grid_size']: item['y_true_network']       
    for item in burning_result_list
}
all_y_true = {
    item['grid_size']: item['y_true']
    for item in burning_result_list 
}

In [None]:
filename = 'chronnet_graph_list.pkl'
with open(filename, 'rb') as f:
    chronnet_graph_list = pickle.load(f)
    
grid_sizes = [2000, 3000, 5000]

network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

# node_metrics_list 
with open('node_metrics_list.pkl', 'rb') as f:
    node_metrics_list = pickle.load(f)

# chronnet_result_list
with open('chronnet_result_list.pkl', 'rb') as f:
    chronnet_result_list = pickle.load(f) 

In [None]:
size_to_gdf = {
    entry['grid_size']: entry['gdf_sjoined']
    for entry in chronnet_result_list
}
size_to_gdf[5000]

In [None]:
# compute eigenvector centrality for the top 2 SCCs in each grid size

scc_node_result = []
grid_sizes = [2000, 3000, 5000]

for grid_size in grid_sizes:

    G = network_list.get(grid_size)
    sccs = list(nx.strongly_connected_components(G))
    top2_sccs = sorted(sccs, key=len, reverse=True)[:2]

    y_true_sis = all_y_true_sis.get(grid_size, {})
    y_true_sf = all_y_true_sf.get(grid_size, {})
    y_true_network = all_y_true_net.get(grid_size, {})      
    y_true = all_y_true.get(grid_size, {})  

    size_to_gdf = {
    entry['grid_size']: entry['gdf_sjoined']
    for entry in chronnet_result_list
    }
    
    gdf_sjoined = size_to_gdf.get(grid_size, pd.DataFrame())

    for rank, scc_nodes in enumerate(top2_sccs, start=1):
        subG = G.subgraph(scc_nodes).copy()
        cc_node_list = list(subG.nodes())

        # Compute node metrics
        in_deg_dict = dict(subG.in_degree())
        out_deg_dict = dict(subG.out_degree())
        in_str_dict = dict(subG.in_degree(weight='weight'))
        out_str_dict = dict(subG.out_degree(weight='weight'))
        deg_dict = dict(subG.degree())
        str_dict = dict(subG.degree(weight='weight'))

        ### !!!!! scc
        scc_y_true_list= compute_y_true(
            gdf_sjoined,
            subG,
            cell_col='cell',
            time_col='time_group',
            lag_hours=12
        )
        scc_y_true_dict=scc_y_true_list['y_true_dict']
        scc_y_true_sf_dict=scc_y_true_list['y_true_sf_dict']
        scc_y_true_network_dict=scc_y_true_list['y_true_network_dict']
        scc_y_true_sis_dict=scc_y_true_list['y_true_sis_dict']

  


        subG_undir = subG.to_undirected() # ! only one edge is created with an arbitrary choice of which edge data to use.
        ev_dict = nx.eigenvector_centrality(
            subG,
            max_iter=1000,
            tol=1e-6
            #,
            #weight='weight'
        )
        ev_dict_w=nx.eigenvector_centrality(
            subG,
            max_iter=1000,
            tol=1e-6,
            weight='weight'
        )
        betweeness_dict = nx.betweenness_centrality(
            subG,
            normalized=True
          #  weight='weight'
        )
        betweeness_dict_w = nx.betweenness_centrality(
            subG,
            normalized=True,
            weight='weight'
        )
        closeness_dict = nx.closeness_centrality(
            subG
          #  distance='weight'
        )
        closeness_dict_w = nx.closeness_centrality(
            subG,
            distance='weight'
        )
        clustering_coeffs = nx.clustering(subG_undir)
        clastering_coeffs_w = nx.clustering(subG_undir, weight='weight')   

        # NetworkX's PageRank already uses a row‑stochastic transition matrix (out‑degree normalised).
        pagerank_dict = nx.pagerank(subG, alpha=0.85, max_iter=100, tol=1e-6,weight=None)
        pagerank_dict_w = nx.pagerank(subG, alpha=0.85, max_iter=100, tol=1e-6, weight='weight') 

        scc_node_metrics = pd.DataFrame({
            'cell': cc_node_list,
            'degree': [deg_dict[cell] for cell in cc_node_list],
            'strength': [str_dict[cell] for cell in cc_node_list],
            'in_degree': [in_deg_dict[cell] for cell in cc_node_list],
            'out_degree': [out_deg_dict[cell] for cell in cc_node_list],
            'in_strength': [in_str_dict[cell] for cell in cc_node_list],
            'out_strength': [out_str_dict[cell] for cell in cc_node_list],
            'clustering_coefficient': [clustering_coeffs[cell] for cell in cc_node_list],
            'eigenvector_centrality': [ev_dict[cell] for cell in cc_node_list],
            'betweenness_centrality': [betweeness_dict[cell] for cell in cc_node_list],
            'closeness_centrality': [closeness_dict[cell] for cell in cc_node_list],
            'eigenvector_centrality_w': [ev_dict_w[cell] for cell in cc_node_list],
            'betweenness_centrality_w': [betweeness_dict_w[cell] for cell in cc_node_list],
            'closeness_centrality_w': [closeness_dict_w[cell] for cell in cc_node_list],
            'clustering_coefficient_w': [clastering_coeffs_w.get(cell, 0) for cell in cc_node_list],
            'pagerank': [pagerank_dict[c] for c in cc_node_list],
            'pagerank_w': [pagerank_dict_w[c] for c in cc_node_list],
            
            'v_true_sis': [y_true_sis.get(cell, 0) for cell in cc_node_list],
            'v_true_net': [y_true_network.get(cell, 0) for cell in cc_node_list],
            'v_true_sf': [y_true_sf.get(cell, 0) for cell in cc_node_list],
            'v_true': [y_true.get(cell, 0) for cell in cc_node_list],

            'v_true_scc': [scc_y_true_dict.get(cell, 0) for cell in cc_node_list],
            'v_true_scc_net': [scc_y_true_network_dict.get(cell, 0) for cell in cc_node_list],
            'v_true_scc_sf': [scc_y_true_sf_dict.get(cell, 0) for cell in cc_node_list],    
            'v_true_scc_sis': [scc_y_true_sis_dict.get(cell, 0) for cell in cc_node_list]

        })
        
        scc_node_result.append({
            'grid_size': grid_size,
            'scc_rank': rank,
            'node_metrics': scc_node_metrics
        })






In [None]:
# save the scc_node_result to pickle file
with open('scc_node_result.pkl', 'wb') as f:
    pickle.dump(scc_node_result, f) 

In [None]:
# show the results

for rec in scc_node_result:
    print(f"grid_size={rec['grid_size']}, scc_rank={rec['scc_rank']}, rows={len(rec['node_metrics'])}")
    display(rec['node_metrics'].describe())

## scc_eval_list

In [None]:
# open the scc_node_result.pkl file
import pickle
with open('scc_node_result.pkl', 'rb') as f:
    scc_node_result = pickle.load(f)

In [None]:
# compute JSD , recognition quality between node metrics and v_true_sis 

scc_eval_list = []


for rec in scc_node_result:
    node_metrics = rec['node_metrics']
    v_true_sis = node_metrics['v_true_sis'].values
    for metric in ['degree','strength','eigenvector_centrality','eigenvector_centrality_w', 'betweenness_centrality', 'closeness_centrality','pagerank', 'pagerank_w']:
        v_metric = node_metrics[metric].values
        jsd_val = jsd_from_samples(v_metric, v_true_sis, scale='minmax')
        rq_val = recognition_quality(v_metric, v_true_sis)


        rho = spearmanr(v_metric, v_true_sis).correlation
        ken = kendalltau(v_metric, v_true_sis).correlation
        ndcg_100 = ndcg_score([v_metric], [v_true_sis], k=100)
        ndcg_300 = ndcg_score([v_metric], [v_true_sis], k=300)
        ndcg_500 = ndcg_score([v_metric], [v_true_sis], k=500)
        ndcg_1000= ndcg_score([v_metric], [v_true_sis], k=1000)
        k1 = int(len(v_metric) * 0.01)
        k5 = int(len(v_metric) * 0.05)
        k10 = int(len(v_metric) * 0.1)
        k20 = int(len(v_metric) * 0.2)
        k50 = int(len(v_metric) * 0.5)  
        nd_1p = ndcg_score([v_metric], [v_true_sis], k=k1)
        nd_5p = ndcg_score([v_metric], [v_true_sis], k=k5)
        nd_10p = ndcg_score([v_metric], [v_true_sis], k=k10)
        nd_20p = ndcg_score([v_metric], [v_true_sis], k=k20)        
        nd_50p = ndcg_score([v_metric], [v_true_sis], k=k50)


        scc_eval_list.append({
            'grid_size': rec['grid_size'],
            'scc_rank': rec['scc_rank'],
            'metric': metric,
            'JSD': jsd_val,
            'RQ': rq_val,
            'rho': rho,
            'ken': ken,
            'ndcg_100': ndcg_100,
            'ndcg_300': ndcg_300,
            'ndcg_500': ndcg_500,
            'ndcg_1000': ndcg_1000,
            'ndcg_1p': nd_1p,
            'ndcg_5p': nd_5p,
            'ndcg_10p': nd_10p,
            'ndcg_20p': nd_20p,
            'ndcg_50p': nd_50p
        })



scc_eval_df = pd.DataFrame(scc_eval_list)
scc_eval_df

In [None]:
# save the scc_eval_df to csv file

scc_eval_df.to_csv('scc_eval_results.csv', index=False)

# SIS modeling

## Sweep Tau experiment

In [None]:
with open('burning_result_list.pkl', 'rb') as f:
    burning_result_list = pickle.load(f)

all_y_true_sis = {
    item['grid_size']: item['y_true_sis']
    for item in burning_result_list
}
all_y_true_sf = {
    item['grid_size']: item['y_true_sf']
    for item in burning_result_list     
}
all_y_true_net = {
    item['grid_size']: item['y_true_network']       
    for item in burning_result_list
}
all_y_true = {
    item['grid_size']: item['y_true']
    for item in burning_result_list 
}

In [None]:
import numpy as np
import pandas as pd
import networkx as nx

grid_sizes = [2000, 3000, 5000]


network_list = {
    entry['grid_size']: entry['chronnet_pruned']
    for entry in chronnet_graph_list
}

gamma = 1.0
tau_c = 1.0
tau_vals = np.linspace(1.1 * tau_c, 100 * tau_c, num=100)


sis_results_list = []

for grid_size in grid_sizes:
    G = network_list.get(grid_size)

    sccs = list(nx.strongly_connected_components(G))
    top2_sccs = sorted(sccs, key=len, reverse=True)[:2]

    
    for rank, scc_nodes in enumerate(top2_sccs, start=1):
        subG = G.subgraph(scc_nodes).copy()
        cc_node_list = list(subG.nodes())
        W = nx.to_numpy_array(subG, nodelist=cc_node_list, weight='weight', dtype=float)
        lambda_max = np.max(np.abs(np.linalg.eigvals(W)))
        W_norm = W / lambda_max

        for tau in tau_vals:
            beta = tau * gamma
            print(f"[grid {grid_size} | SCC {rank}] tau={tau:.3f}, beta={beta:.3f}")

            v_sis = nimfa_sis_steady_state_root_1(W_norm, beta, gamma)
            if np.all(v_sis == 0):
                print(f"[grid {grid_size} | SCC {rank}] tau={tau:.3f} not converged, skip")
                continue
            


            y_true = np.array([all_y_true[grid_size].get(node, 0) for node in cc_node_list], dtype=float)
            y_true_sf = np.array([all_y_true_sf[grid_size].get(node, 0) for node in cc_node_list], dtype=float)
            y_true_sis = np.array([all_y_true_sis[grid_size].get(node, 0) for node in cc_node_list], dtype=float)
            y_true_network = np.array([all_y_true_net[grid_size].get(node, 0) for node in cc_node_list], dtype=float)
            v_hybrid = y_true_sf + (1 - y_true_sf) * v_sis
            
            
            # hybrid
            rq_val         = recognition_quality(v_hybrid, y_true)
            jsd_val        = jsd_from_samples(v_hybrid, y_true, scale='minmax')
            mse            = np.mean((v_hybrid - y_true) ** 2)
            rmse           = np.sqrt(mse)

            # net # jsd_val_net    = jsd_from_samples(v_sis, y_true_sis, scale='none')
            rq_val_net     = recognition_quality(v_sis, y_true_sis)
            mse_net        = np.mean((v_sis - y_true_sis) ** 2)
            rmse_net       = np.sqrt(mse_net)
            jsd_val_net_nm = jsd_from_samples(v_sis, y_true_sis, scale='minmax')
            
            # Compute Kendall and Spearman correlations
            
            rho=spearmanr(v_sis, y_true_sis).statistic
            p_value=spearmanr(v_sis, y_true_sis).pvalue
    
            nd_50= ndcg_score([y_true_sis], [v_sis], k=50)
            nd_100 = ndcg_score([y_true_sis], [v_sis], k=100)
            nd_300 = ndcg_score([y_true_sis], [v_sis], k=300)
            nd_500 = ndcg_score([y_true_sis], [v_sis], k=500)
            nd_1000 = ndcg_score([y_true_sis], [v_sis], k=1000)


            # compute the topk% ndcg
            k1= int(len(v_sis) * 0.01)
            k5= int(len(v_sis) * 0.05)
            k10= int(len(v_sis) * 0.1)
            k20= int(len(v_sis) * 0.2)
            k50= int(len(v_sis) * 0.5)

            nd_1p = ndcg_score([y_true_sis], [v_sis], k=k1)
            nd_5p = ndcg_score([y_true_sis], [v_sis], k=k5)
            nd_10p = ndcg_score([y_true_sis], [v_sis], k=k10)
            nd_20p = ndcg_score([y_true_sis], [v_sis], k=k20)
            nd_50p = ndcg_score([y_true_sis], [v_sis], k=k50)

            
            sis_results_list.append({
                'grid_size': grid_size,
                'scc_rank':      rank,
                'tau':           tau,
                'beta':          beta,
                'gamma':         gamma,
                'cc_node_list': cc_node_list,

                'RQ':            rq_val,
                'JSD':           jsd_val,
                'MSE':           mse,
                'RMSE':          rmse,
                'RQ_net':        rq_val_net,
                'MSE_net':       mse_net,
                'RMSE_net':      rmse_net,
                'JSD_net_norm':  jsd_val_net_nm,

                'rho':           rho,
                'p_value':       p_value,
                'ndcg_50':       nd_50,
                'ndcg_100':      nd_100,
                'ndcg_300':      nd_300,
                'ndcg_500':      nd_500,
                'ndcg_1000':     nd_1000,
                'ndcg_1p':       nd_1p,
                'ndcg_5p':       nd_5p,
                'ndcg_10p':      nd_10p,
                'ndcg_20p':      nd_20p,
                'ndcg_50p':      nd_50p,

                'v_sis':         v_sis,
                'v_hybrid':      v_hybrid,

                'y_true':     y_true,
                'y_true_sis': y_true_sis,
                'y_true_sf':  y_true_sf,
                'y_true_net': y_true_network
               
            })

 


In [None]:
# save the sis_results_list to pickle file
with open('sis_results_list.pkl', 'wb') as f:
    pickle.dump(sis_results_list, f)    

## Select the optimal tau: best_tau_df 

In [None]:
import pandas as pd
import numpy as np

FILE = "sis_results_tau.csv"          # ← adjust if the file lives elsewhere

RQ_COL  = "RQ_net"
JSD_COL = "JSD_net_norm"
TAU_COL = "tau"

def pick_tau_turning_point(df,
                           rq_plateau_thresh=2e-3,   # |ΔRQ| below this ⇒ flat
                           plateau_window=2,         # how many flat steps in a row
                           rq_plateau_frac=0.95):    # fallback: ≥95 % of max RQ
    """
    Given a (grid_size, scc_rank) sub‑DataFrame, return one row containing
    tau_opt, RQ_net, JSD_net_norm that best trades off high‑and‑flat RQ vs
    still‑low JSD.
    """
    # 1. average repeated runs at the same τ
    agg = (df.groupby(TAU_COL)
             .agg({RQ_COL: "mean", JSD_COL: "mean"})
             .sort_index()
             .reset_index())

    # If we only have a couple of τ values, just grab the “best” one.
    if len(agg) < 3:
        return agg.sort_values([RQ_COL, JSD_COL], ascending=[False, True]).iloc[0]

    # 2. finite differences
    agg["dJSD"] = agg[JSD_COL].diff()
    agg["dRQ"]  = agg[RQ_COL].diff()

    best = None
    # 3. turning point: JSD just turned upward and RQ has been flat
    for i in range(plateau_window, len(agg)):
        jsd_turn = agg.loc[i - 1, "dJSD"] <= 0 and agg.loc[i, "dJSD"] > 0
        rq_flat  = (agg.loc[i - plateau_window + 1 : i, "dRQ"]
                      .abs()
                      .lt(rq_plateau_thresh)
                      .all())
        if jsd_turn and rq_flat:
            best = agg.loc[i, [TAU_COL, RQ_COL, JSD_COL]]
            break
            
        if best is None:
            max_rq = agg[RQ_COL].max()
            plateau = agg[agg[RQ_COL] >= rq_plateau_frac * max_rq]
            best = plateau.nsmallest(1, JSD_COL).iloc[0]

    better = agg[
        (agg[JSD_COL] < best[JSD_COL]) &
        (agg[RQ_COL] > best[RQ_COL])
    ]
    if not better.empty:
        best = better.sort_values(RQ_COL, ascending=False).iloc[0]

    return best[[TAU_COL, RQ_COL, JSD_COL]]



# ------------------------------------------------------------------ main driver
# df = pd.read_csv(FILE)
# df=csv(sis_results_list)
df =pd.DataFrame(sis_results_list)


best_rows = []
for (gsize, rank), sub in df.groupby(["grid_size", "scc_rank"]):
    best = pick_tau_turning_point(sub)
    best_rows.append({
        "grid_size":      gsize,
        "scc_rank":       rank,
        "tau_opt":        best[TAU_COL],
        RQ_COL:           best[RQ_COL],
        JSD_COL:          best[JSD_COL]
    })

results = pd.DataFrame(best_rows).sort_values(["grid_size", "scc_rank"])
print(results.to_string(index=False))


best_tau_df=results
# If you need it on disk:
results.to_csv("best_tau_by_grid_scc.csv", index=False)


 grid_size  scc_rank   tau_opt   RQ_net  JSD_net_norm
      2000         1 16.084848 0.673588      0.624126
      2000         2 25.075758 0.754363      0.531153
      3000         1 48.052525 0.744169      0.382687
      3000         2 17.083838 0.773537      0.156139
      5000         1 29.071717 0.761696      0.217230
      5000         2 23.077778 0.818132      0.178870


## Scatter plot with baseline

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

best_tau_df = pd.read_csv('best_tau_by_grid_scc.csv')  
scc_eval_df=pd.read_csv('scc_eval_results.csv')
sis_results_df = pd.DataFrame(sis_results_list)

degree_df = scc_eval_df[scc_eval_df['metric'] == 'pagerank_w']  # contains scc_eval_df[scc_eval_df['metric']=='degree'] results

# Create a single 3x2 figure instead of one-per-grid_size
fig, axes = plt.subplots(
    nrows=3, ncols=2,
    figsize=(12, 18),
    gridspec_kw={'wspace': 0.3, 'hspace': 0.4, 'right': 0.85}
)

# Loop over grid_size (rows) and scc_rank (cols)
for i, (grid_size, sub_df) in enumerate(sis_results_df.groupby('grid_size')):
    for j, (scc_rank, group) in enumerate(sub_df.groupby('scc_rank')):
        ax = axes[i, j]
        

        
        
        scatter = ax.scatter(
          
            group['JSD_net_norm'],
            group['RQ_net'],
           
            c=group['tau'],
            cmap='viridis',
            s=30,
            alpha=0.7,
            label='SIS steady-state infection rate' 
        )

        # --- plot degree baselines ---
        # fetch matching row in degree_df
        baseline = degree_df[
            (degree_df['grid_size'] == grid_size) &
            (degree_df['scc_rank']  == scc_rank)
        ].iloc[0]

        # plot the optiimal tau point
        best_row = best_tau_df[(best_tau_df['grid_size'] == grid_size) & (best_tau_df['scc_rank']  == scc_rank)]

        if not best_row.empty:
            x = best_row['JSD_net_norm'].values[0]
            y = best_row['RQ_net'].values[0]
            tau_label = f"τ={best_row['tau_opt'].values[0]:.2f}"
        
            ax.scatter(x, y, color='red', s=80, marker='*', label='Optimal τ')
            ax.annotate(tau_label, (x, y),
                        textcoords="offset points", xytext=(5, -10),
                ha='left', fontsize=9, color='red') 




        # !! need to change with the scc_eval_df
        ax.axvline(baseline['JSD'], linestyle='--', label='Node PageRank baseline')
        # horizontal line at degree RQ
        ax.axhline(baseline['RQ'],  linestyle='--')

        ax.set_xlabel('JSD')
        ax.set_ylabel('Average Recall (AR)')
        ax.set_title(f'grid_size={grid_size}, scc_rank={scc_rank}')
        ax.margins(x=0.05, y=0.05)
        ax.legend(loc='lower left', bbox_to_anchor=(0.02, 0.02))
        

    fig.colorbar(scatter,ax=axes[i, :],             
            label='τ value',          
            orientation='vertical',
            fraction=0.025,           
            pad=0.02   )              

plt.show()


fig.savefig('sis_results_tau.pdf', format='pdf', bbox_inches='tight')

In [None]:
metrics_by_size = {
    entry['grid_size']: entry['node_metrics']
    for entry in node_metrics_list
}

In [None]:
# save the df to csv 
sis_df = pd.DataFrame(sis_results_list)
sis_df.to_csv(f'sis_results.csv', index=False)

In [None]:
import pandas as pd

df = pd.DataFrame(sis_results_list)
result_df = df[df['grid_size'] == 5000]
print(result_df)

## NDCG : SIS with(the optiaml tau) vs PageRank

In [None]:
import pandas as pd
import pickle



with open('sis_results_list_100.pkl', 'rb') as f:
    sis_results_list = pickle.load(f)   


scc_df = pd.DataFrame(sis_results_list) #scc_df = pd.read_csv('scc_results.csv')

best_tau_df = pd.read_csv('best_tau_by_grid_scc.csv')
merged_rows = []

for _, row in best_tau_df.iterrows():
    g, r, tau_opt = row['grid_size'], row['scc_rank'], row['tau_opt']

    
    match = scc_df[
        (scc_df['grid_size'] == g) &
        (scc_df['scc_rank'] == r) &
        (scc_df['tau'].round(6) == round(tau_opt, 6))  
    ]

    if match.empty:
        print(f"Warning: no match found for grid={g}, rank={r}, tau={tau_opt}")
        continue

   
    merged = match.mean(numeric_only=True).to_dict()
    merged['grid_size'] = g
    merged['scc_rank'] = r
    merged['tau_opt'] = tau_opt

    merged_rows.append(merged)


final_df = pd.DataFrame(merged_rows)
final_df = final_df.sort_values(['grid_size', 'scc_rank'])

final_df



Unnamed: 0,grid_size,scc_rank,tau,beta,gamma,RQ,JSD,MSE,RMSE,RQ_net,...,ndcg_100,ndcg_300,ndcg_500,ndcg_1000,ndcg_1p,ndcg_5p,ndcg_10p,ndcg_20p,ndcg_50p,tau_opt
0,2000.0,1.0,16.084848,16.084848,1.0,0.447073,0.433602,0.212329,0.460792,0.673588,...,0.518537,0.656073,0.726481,0.801077,0.398057,0.550256,0.636761,0.735868,0.827745,16.084848
1,2000.0,2.0,25.075758,25.075758,1.0,0.498715,0.464845,0.223227,0.472469,0.754363,...,0.807354,0.843784,0.883469,0.917787,0.667798,0.780181,0.824938,0.840175,0.894838,25.075758
2,3000.0,1.0,48.052525,48.052525,1.0,0.531685,0.324928,0.276043,0.525398,0.744169,...,0.570478,0.678075,0.730133,0.794098,0.537202,0.669103,0.742679,0.806082,0.882797,48.052525
3,3000.0,2.0,17.083838,17.083838,1.0,0.660871,0.312646,0.046128,0.214775,0.773537,...,0.742701,0.77369,0.787421,0.809069,0.603978,0.734376,0.753121,0.775593,0.804727,17.083838
4,5000.0,1.0,29.071717,29.071717,1.0,0.60896,0.407102,0.161796,0.402239,0.761696,...,0.663033,0.750054,0.781374,0.827452,0.616333,0.706,0.758263,0.793197,0.888892,29.071717
5,5000.0,2.0,23.077778,23.077778,1.0,0.667864,0.327543,0.081528,0.285531,0.818132,...,0.62991,0.730102,0.753207,0.767546,0.509998,0.622755,0.667259,0.735256,0.76774,23.077778


In [None]:
final_df.to_csv('sis_best_tau_results.csv', index=False)

In [None]:
scc_eval_df = pd.read_csv('scc_eval_results.csv')
pagerank_df= scc_eval_df[scc_eval_df['metric'] == 'pagerank_w']  # contains scc_eval_df[scc_eval_df['metric']=='degree'] results  
ndcg_cols = ['ndcg_100', 'ndcg_300', 'ndcg_500', 'ndcg_1000']
final_df= pd.read_csv('sis_best_tau_results.csv')


combinations = final_df[['grid_size', 'scc_rank']].drop_duplicates().sort_values(['grid_size', 'scc_rank']).values

n_plots = len(combinations)
n_cols = 2
n_rows = (n_plots + n_cols - 1) // n_cols

fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 4 * n_rows), squeeze=False)

for idx, (grid_size, scc_rank) in enumerate(combinations):
    ax = axes[idx // n_cols, idx % n_cols]

    

    pr_row = pagerank_df[(pagerank_df['grid_size'] == grid_size) & (pagerank_df['scc_rank'] == scc_rank)]
    if not pr_row.empty:
        pr_vals = [pr_row.iloc[0][col] for col in ndcg_cols]
        ax.plot(ndcg_cols, pr_vals, marker='s', label='PageRank baseline')

    sis_row = final_df[(final_df['grid_size'] == grid_size) & (final_df['scc_rank'] == scc_rank)]
    if not sis_row.empty:
        sis_vals = [sis_row.iloc[0][col] for col in ndcg_cols]
        ax.plot(ndcg_cols, sis_vals, marker='o', label='SIS model')

    
    ax.set_title(f"grid_size={int(grid_size)}, scc_rank={int(scc_rank)}")
    ax.set_ylim(0.3, 1)
    ax.set_ylabel("NDCG")
    ax.set_xlabel("Top-k")
    ax.legend()

for idx in range(n_plots, n_rows * n_cols):
    fig.delaxes(axes[idx // n_cols, idx % n_cols])

plt.tight_layout()
plt.savefig('sis_results_ndcg_by_grid_scc.pdf', format='pdf', bbox_inches='tight')
plt.show()

In [None]:
colmns=['grid_size', 'scc_rank', 'tau_opt', 'RQ_net', 'JSD_net_norm',
         'ndcg_100', 'ndcg_300','ndcg_500', 'ndcg_1000']
final_df1 = final_df[colmns]
final_df1
final_df1.to_csv('ablation_study_sis.csv', index=False)

In [None]:
colmns=['grid_size', 'scc_rank',  'RQ', 'JSD',
         'ndcg_100', 'ndcg_300','ndcg_500', 'ndcg_1000']
pagerank_df= scc_eval_df[scc_eval_df['metric'] == 'pagerank_w']
pagerank_df1= pagerank_df[colmns]
pagerank_df1
pagerank_df1.to_csv('ablation_study_pagerank.csv', index=False)

# Correlation: Node Centrality vs Fire risk

In [65]:
# grid_size =5000 show the node metrics in node_metrics_list
grid_size = 5000
node_metrics = next((
    rec['node_metrics'] for rec in node_metrics_list if rec['grid_size'] == grid_size
), None)
node_metrics.columns


Index(['cell', 'burning_count', 'degree', 'strength', 'in_degree',
       'out_degree', 'in_strength', 'out_strength', 'v_true_sis', 'v_true_sf',
       'v_true_net', 'v_true', 'burning_count_sf', 'burning_count_net'],
      dtype='object')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
import numpy as np

n_rows = len(scc_node_result)
metrics = [
    'degree',
    'strength',
    'pagerank',
    'eigenvector_centrality',
    'betweenness_centrality',
    'closeness_centrality',
    'clustering_coefficient'
   
]
spearman_results = []

n_cols = len(metrics)
# 6*? grid, width 30 inch, height 5 inch per row
fig, axes = plt.subplots(n_rows, n_cols, figsize=(30, 5 * n_rows), squeeze=False)

for row_idx, rec in enumerate(scc_node_result):
    node_metrics = rec['node_metrics']
    v_true_sis = node_metrics['v_true_scc_sis'].values
    grid_size = rec['grid_size']
    scc_rank = rec['scc_rank']

    for col_idx, metric in enumerate(metrics):
        ax = axes[row_idx, col_idx]
        v_metric = node_metrics[metric].values

        sns.scatterplot(x=v_true_sis, y=v_metric, ax=ax)

        ax.set_xscale('log')
        if metric == 'strength':
            ax.set_yscale('log')

        ax.set_xlabel('Ground Truth (true network-driven fire probability)')
        ax.set_ylabel(metric)
        ax.set_title(f'Grid {grid_size}, Rank {scc_rank}\n{metric}')

       
        try:
          #  pearson_r, pearson_p = pearsonr(v_true_sis, v_metric)
            spearman_r, spearman_p = spearmanr(v_true_sis, v_metric)

           
            x_pos = np.nanmax(v_true_sis)
            y_pos = np.nanmax(v_metric)

            ax.text(x_pos, y_pos,
                   # f'Pearson r={pearson_r:.3f}, p={pearson_p:.3g}\n'
                    f'Spearman r={spearman_r:.3f}, p={spearman_p:.3g}',
                    ha='right', va='top', fontsize=10,
                    bbox=dict(facecolor='white', alpha=0.6, edgecolor='gray'))
            spearman_results.append({
                'grid_size': grid_size,
                'scc_rank': scc_rank,
                'metric': metric,
                'spearman_r': spearman_r,
                'spearman_p': spearman_p
            })
        except Exception as e:
            ax.text(0.95, 0.95, f'Error:\n{e}', transform=ax.transAxes,
                    ha='right', va='top', fontsize=10,
                    bbox=dict(facecolor='white', alpha=0.6, edgecolor='gray'))

plt.tight_layout()
plt.show()

fig.savefig('/Users/mabelhu/Desktop/figure/scc_node.pdf', format='pdf')


In [13]:
df_spearman = pd.DataFrame(spearman_results)
df_spearman.to_csv('scc_node_spearman_results.csv', index=False)

In [56]:
a=scc_eval_df[scc_eval_df['metric'] == 'pagerank_w']
columns=['grid_size', 'scc_rank', 'JSD', 'RQ']
a = a[columns]
a = a.sort_values(['grid_size', 'scc_rank'])
a.to_csv('pagerank_eval.csv', index=False)


In [60]:
columns_to_keep = [    'grid_size', 'scc_rank',   'rho']
final_df[columns_to_keep]

Unnamed: 0,grid_size,scc_rank,rho
0,2000.0,1.0,0.578604
1,2000.0,2.0,0.74268
2,3000.0,1.0,0.755673
3,3000.0,2.0,0.707319
4,5000.0,1.0,0.800925
5,5000.0,2.0,0.790563


# Method Validation: Spontaneous fire vs network driven fire 

In [None]:

with open('node_metrics_list.pkl', 'rb') as f:
    node_metrics_list = pickle.load(f)
   
node_metrics_list


In [None]:
shapefile_path = "/Users/mabelhu/Desktop/Code/DL_FIRE_SV-C2_576237/fire_archive_SV-C2_576237.shp"
gdf = gpd.read_file(shapefile_path)
gdf = gdf[gdf['CONFIDENCE'].isin(['h', 'n'])]

# Transform to meter-based
if gdf.crs.to_string() == 'EPSG:4326':
    gdf = gdf.to_crs(epsg=3857)

# Convert to datetime
gdf['ACQ_TIME'] = gdf['ACQ_TIME'].astype(str).str.zfill(4)
gdf['acq_time'] = pd.to_datetime(
    gdf['ACQ_DATE'].astype(str) + ' ' + gdf['ACQ_TIME'],
    format='%Y-%m-%d %H%M'
)
gdf['acq_time'] = gdf['acq_time'].apply(pd.Timestamp)


with open('node_metrics_list.pkl', 'rb') as f:
    node_metrics_list = pickle.load(f)

node_geo_list = []

for item in node_metrics_list:
    grid_size = item['grid_size']
    node_metrics = item['node_metrics'][['cell', 'v_true_sis', 'v_true_sf']]

    hex_grid = cu.create_hex_grid(gdf, grid_size)  # 应返回包含 'cell' 和 'geometry'

    # 检查 'cell' 是否存在
    if 'cell' not in hex_grid.columns:
        raise ValueError(f"'cell' column missing in hex_grid for grid_size={grid_size}")

    merged_df = node_metrics.merge(hex_grid[['cell', 'geometry']], on='cell', how='left')

    for _, row in merged_df.iterrows():
         node_geo_list.append({
            'grid_size': grid_size,
            'cell': row['cell'],
            'v_true_sis': row['v_true_sis'],
            'v_true_sf': row['v_true_sf'],
            'geometry': row['geometry']
        })

node_geo_df = pd.DataFrame(node_geo_list)


node_geo_df = gpd.GeoDataFrame(
    node_geo_df,
    geometry='geometry',
    crs=gdf.crs  
)



## compuet Spearman between v_true_sis and v_true_sf for each grid size

In [None]:
# compuet Spearman between v_true_sis and v_true_sf for each grid size
spearman_results = []
for grid_size in node_geo_df['grid_size'].unique(): 
    subset = node_geo_df[node_geo_df['grid_size'] == grid_size]
    spearman_r, spearman_p = spearmanr(subset['v_true_sis'], subset['v_true_sf'])
    spearman_results.append({
        'grid_size': grid_size,
        'spearman_r': spearman_r,
        'spearman_p': spearman_p
    })

spearman_df = pd.DataFrame(spearman_results)
print(spearman_df)

# Spatial Distribution

In [None]:
shapefile_path = "/Users/mabelhu/Desktop/Code/DL_FIRE_SV-C2_576237/fire_archive_SV-C2_576237.shp"
gdf = gpd.read_file(shapefile_path)
gdf = gdf[gdf['CONFIDENCE'].isin(['h', 'n'])]

# Transform to meter-based
if gdf.crs.to_string() == 'EPSG:4326':
    gdf = gdf.to_crs(epsg=3857)

# Convert to datetime
gdf['ACQ_TIME'] = gdf['ACQ_TIME'].astype(str).str.zfill(4)
gdf['acq_time'] = pd.to_datetime(
    gdf['ACQ_DATE'].astype(str) + ' ' + gdf['ACQ_TIME'],
    format='%Y-%m-%d %H%M'
)
gdf['acq_time'] = gdf['acq_time'].apply(pd.Timestamp)

with open('scc_node_result.pkl', 'rb') as f:
    scc_node_result = pickle.load(f)

scc_node_geo_list = []

for item in scc_node_result:
    grid_size = item['grid_size']
    scc_rank = item['scc_rank']
    node_metrics = item['node_metrics'][['cell', 'v_true_sis']]

    hex_grid = cu.create_hex_grid(gdf, grid_size) 

    
    if 'cell' not in hex_grid.columns:
        raise ValueError(f"'cell' column missing in hex_grid for grid_size={grid_size}")

    merged_df = node_metrics.merge(hex_grid[['cell', 'geometry']], on='cell', how='left')

    for _, row in merged_df.iterrows():
        scc_node_geo_list.append({
            'grid_size': grid_size,
            'scc_rank': scc_rank,
            'cell': row['cell'],
            'v_true_sis': row['v_true_sis'],
            'geometry': row['geometry']
        })

scc_node_geo_df = pd.DataFrame(scc_node_geo_list)

print(scc_node_geo_df.head())
print(len(scc_node_geo_df))


In [None]:
# get v_sis with optial tau from sis_results_list_100.pkl and merge with sis_best_tau_results.csv
with open('sis_results_list.pkl', 'rb') as f:
    sis_results_list = pickle.load(f)
sis_df = pd.DataFrame(sis_results_list)
final_df= pd.read_csv('sis_best_tau_results.csv')

final_df['tau_round'] = final_df['tau'].round(6)
sis_df['tau_round'] = sis_df['tau'].round(6)

merged_df = final_df.merge(
    sis_df[['grid_size', 'scc_rank', 'tau_round','cc_node_list' ,'v_sis']],
    on=['grid_size', 'scc_rank', 'tau_round'],
    how='left'
)

print(merged_df[['grid_size', 'scc_rank', 'tau_round','cc_node_list' ,'v_sis']])



# match scc_node_geo_df with merged_df with v_sis
scc_node_geo_df['v_sis'] = np.nan

for _, row in merged_df.iterrows():
    grid_size = row['grid_size']
    scc_rank = row['scc_rank']
    cc_nodes = row['cc_node_list']  # list of cell ids
    v_sis_array = row['v_sis']      # array of same length

    tmp_df = pd.DataFrame({
        'cell': cc_nodes,
        'v_sis': v_sis_array
    })

    mask = (
        (scc_node_geo_df['grid_size'] == grid_size) &
        (scc_node_geo_df['scc_rank'] == scc_rank)
    )
    scc_node_geo_df.loc[mask, 'v_sis'] = \
        scc_node_geo_df.loc[mask].merge(tmp_df, on='cell', how='left')['v_sis_y'].values


In [None]:
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt


print(type(scc_node_geo_df.loc[0, 'geometry']))


if isinstance(scc_node_geo_df.loc[0, 'geometry'], str):
    from shapely import wkt
    scc_node_geo_df['geometry'] = scc_node_geo_df['geometry'].apply(wkt.loads)


scc_node_geo_gdf = gpd.GeoDataFrame(
    scc_node_geo_df,
    geometry='geometry',
    crs=gdf.crs  
)

print(type(scc_node_geo_gdf))  



<class 'shapely.geometry.polygon.Polygon'>
<class 'geopandas.geodataframe.GeoDataFrame'>


In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.patheffects as pe
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

# --- helper: load countries reliably across environments ---
def load_countries():
    try:
        url = "https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_countries.zip"
        return gpd.read_file(url)
    except Exception as e:
        raise RuntimeError(
            "Could not load Natural Earth countries. "
            "Install `geodatasets` (pip install geodatasets) or `cartopy`, "
            "or ensure internet access for the direct URL."
        ) from e

# --- your original filtering ---
filtered_gdf = node_geo_df[(node_geo_df['grid_size'] == 5000)]

# --- load countries and match CRS ---
countries = load_countries()

# If your data has no CRS but is actually lon/lat WGS84, set it explicitly.
if filtered_gdf.crs is None:
    filtered_gdf = filtered_gdf.set_crs(4326)

if countries.crs != filtered_gdf.crs:
    countries = countries.to_crs(filtered_gdf.crs)

# --- choose a country name column robustly ---
name_col = next(col for col in ['NAME_EN', 'NAME', 'ADMIN', 'SOVEREIGNT'] if col in countries.columns)

# --- get Colombia geometry and neighbors (land-touching) ---
col_mask = countries[name_col].str.lower().eq('colombia')
if not col_mask.any():
    raise ValueError("Colombia not found in Natural Earth attributes.")

geom_series = countries.loc[col_mask, 'geometry']
col_geom = geom_series.union_all() if hasattr(geom_series, "union_all") else geom_series.unary_union
colombia = gpd.GeoDataFrame({name_col: ['Colombia'], 'geometry': [col_geom]}, crs=countries.crs)

# neighbors that touch Colombia
neighbors = countries[countries.touches(col_geom)].copy()

# Some datasets include tiny maritime slivers; keep only neighbors with non-empty land border
def shared_border(g):
    inter = g.intersection(col_geom)
    # We need a line-like shared border. If polygon intersection occurs due to topology quirks, use its boundary.
    if inter.is_empty:
        return None
    # If intersection is an area (rare), take its boundary for labeling anchor
    if inter.geom_type in ('Polygon', 'MultiPolygon'):
        inter = inter.boundary
    return inter

neighbors['shared'] = neighbors['geometry'].apply(shared_border)
neighbors = neighbors[neighbors['shared'].notnull() & (~neighbors['shared'].is_empty)]

# --- map extent around Colombia only ---
minx, miny, maxx, maxy = colombia.total_bounds
padx = (maxx - minx) * 0.25
pady = (maxy - miny) * 0.25
extent = (minx - padx, maxx + padx, miny - pady, maxy + pady)

# --- labeling helper: place text near shared border, nudged outward from Colombia ---
cx, cy = col_geom.representative_point().coords[0]

def label_neighbor_on_border(ax, border_geom, text):
    # anchor near the middle of the shared border
    try:
        anchor = border_geom.representative_point()
    except Exception:
        anchor = border_geom.centroid
    x, y = anchor.coords[0]

    # offset direction away from Colombia centroid to push label到国境外侧
    dx, dy = x - cx, y - cy
    ha = 'left' if dx >= 0 else 'right'
    va = 'bottom' if dy >= 0 else 'top'
    ox = 3 if dx >= 0 else -3
    oy = 3 if dy >= 0 else -3

    ax.annotate(
        text, (x, y), xytext=(ox, oy), textcoords='offset points',
        ha=ha, va=va, fontsize=7, zorder=30,
        path_effects=[pe.withStroke(linewidth=1, foreground="white")]
    )

# --- draw ---
fig, axes = plt.subplots(1, 2, figsize=(16, 8), dpi=300)

# Norms (your settings)
norm1 = colors.LogNorm(vmin=filtered_gdf['v_true_sis'].min() + 1e-4,
                       vmax=filtered_gdf['v_true_sis'].max())
norm2 = colors.LogNorm(vmin=filtered_gdf['v_true_sf'].min() + 1e-3,
                       vmax=filtered_gdf['v_true_sf'].max())

# ---- left plot ----
ax0 = axes[0]
filtered_gdf.plot(
    column='v_true_sis',
    cmap='OrRd',
    linewidth=0.2,
    edgecolor='black',
    legend=True,
    ax=ax0,
    norm=norm1
)
xlim0, ylim0 = ax0.get_xlim(), ax0.get_ylim()
minx, miny, maxx, maxy = colombia.total_bounds
final_xlim = (min(xlim0[0], minx), max(xlim0[1], maxx))
final_ylim = (min(ylim0[0], miny), max(ylim0[1], maxy))


#colombia.boundary.plot(ax=ax0, color='dimgray', linewidth=0.8, zorder=15)
countries.plot(ax=ax0, facecolor='none', edgecolor='dimgray', linewidth=0.6, zorder=10)

# neighbor labels placed near the shared border, not drawing neighbor polygons
for _, row in neighbors.iterrows():
    label_neighbor_on_border(ax0, row['shared'], row[name_col])

cx, cy = col_geom.representative_point().coords[0]
ax0.annotate(
    "Colombia", (cx, cy),
    ha='center', va='center', fontsize=12, fontweight='bold',
    path_effects=[pe.withStroke(linewidth=1, foreground="white")]
)

ax0.set_xlim(xlim0)
ax0.set_ylim(ylim0)
ax0.set_title('v_true_sis', fontsize=12)
ax0.set_axis_off()

# ---- right plot ----
ax1 = axes[1]
filtered_gdf.plot(
    column='v_true_sf',
    cmap='OrRd',
    linewidth=0.2,
    edgecolor='black',
    legend=True,
    ax=ax1,
    norm=norm2
)

#colombia.boundary.plot(ax=ax1, color='dimgray', linewidth=0.8, zorder=15)
countries.plot(ax=ax1, facecolor='none', edgecolor='dimgray', linewidth=0.6, zorder=10)

for _, row in neighbors.iterrows():
    label_neighbor_on_border(ax1, row['shared'], row[name_col])
cx, cy = col_geom.representative_point().coords[0]
ax1.annotate(
    "Colombia", (cx, cy),
    ha='center', va='center', fontsize=12, fontweight='bold',
    path_effects=[pe.withStroke(linewidth=1, foreground="white")]
)

ax1.set_xlim(xlim0)
ax1.set_ylim(ylim0)
ax1.set_title('v_true_sf', fontsize=12)
ax1.set_axis_off()

plt.tight_layout()
# plt.savefig("spatial_distribution_5000_1_compare.pdf", format="pdf", dpi=300, bbox_inches='tight')
plt.show()
