# 州累积曲线图绘制

## 0 需求文档


核心需求：绘制全美各个州按照优化方案部署PV的效益累积曲线，主要包含计算模块、绘图模块两个部分


各个州的累积曲线；典型州的累积曲线（分成2部分绘制）；
Row: 各个不同的方案、Column: 各个不同的效益维度；
因此，一共需要出2次图，一次全美各州、一次是代表州target states。其中， target_states = ['California', 'Texas', 'Georgia', 'Indiana', 'New York']

输入数据结构

```python
# 最可靠的方法：查找包含data和function目录的项目根目录
def find_project_root(start_path=None):
    """查找项目根目录（包含data和function目录的目录）"""
    if start_path is None:
        start_path = Path.cwd()
    
    current = Path(start_path).resolve()
    
    # 向上查找，直到找到包含data和function目录的目录
    for _ in range(5):  # 最多向上查找5层
        if (current / 'data').exists() and (current / 'function').exists():
            return current
        parent = current.parent
        if parent == current:  # 到达根目录
            break
        current = parent
    
    # 如果找不到，假设当前目录的父目录是项目根目录
    return Path.cwd().parent

project_root = find_project_root()

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

DATA_PATH = project_root / 'data'

print(f"项目根目录: {project_root}")
print(f"数据路径: {DATA_PATH}")
df_pixel_optimized_data = pd.read_csv('data/US_data/df_pixel_optimized_data.csv')
us_nation = gpd.read_file(r'data\US_data\cb_2018_us_nation_5m.shp')
us_states = gpd.read_file(r'data\cb_2018_us_state_500k.shp')

```

首先是绘图所需必要计算数据的参考
```python
import numpy as np
import matplotlib.pyplot as plt
def calculate_state_integration_analysis_simplified(pixel_optimized_data, us_states_4326, df_net_benefit, 
                                                   plot_curves=True, output_dir='data/US_data/US_analysis_reslut'):
    """
    简化版：生成更简洁的州级积分统计结果
    """
    print("=== 开始州级别累积积分统计分析（简化版）===")
    
    # 1. 定义三个维度和四种排序方案
    variables = [
        ('predicted_prob', 'Environmental_sustainability'),
        ('Expectation_net_benefit', 'Emission_mitigation_ability'), 
        ('avg_npv', 'Economic_feasibility')
    ]
    
    solution_types = ['Environmental', 'Emission_mitigation', 'Economic', 'WCCD']
    
    # 2. 数据预处理
    print("正在合并数据...")
    area_data = df_net_benefit[['lat', 'lon', 'area_m2']].copy()
    merged_data = pixel_optimized_data.merge(area_data, on=['lat', 'lon'], how='inner')
    area_values = merged_data['area_m2'].values / 10000
    
    # 3. 创建几何列，添加州标签
    print("正在添加州标签...")
    geometry = [Point(xy) for xy in zip(merged_data['lon'], merged_data['lat'])]
    pixel_gdf = gpd.GeoDataFrame(merged_data, geometry=geometry, crs='EPSG:4326')
    pixel_with_states = gpd.sjoin(pixel_gdf, us_states_4326, how='left', predicate='within')
    data_with_states = pixel_with_states.drop(columns=['geometry']).copy()
    
    # 4. 辅助函数
    def percentage_to_01(percentage_array):
        return percentage_array / 100.0
    
    def calculate_integral(x, y):
        """使用梯形法则计算积分"""
        if len(x) < 2:
            return 0.0
        if x[0] > x[-1]:
            x = x[::-1]
            y = y[::-1]
        integral = 0.0
        for i in range(len(x) - 1):
            dx = x[i+1] - x[i]
            avg_y = (y[i] + y[i+1]) / 2.0
            integral += avg_y * dx
        return integral
    
    # 5. 获取所有州列表
    all_states = data_with_states['NAME'].dropna().unique()
    print(f"发现 {len(all_states)} 个州")
    
    # 6. 存储结果 - 简化结构
    results = []
    state_curves_data = {}
    
    # 7. 对每个排序方案计算总体排序和州级曲线
    for solution_type in solution_types:
        print(f"正在处理排序方案: {solution_type}")
        
        # 7.1 确定总体排序依据
        if solution_type == 'WCCD':
            sort_values = merged_data['ccd_optimized'].values
        elif solution_type == 'Environmental':
            sort_values = merged_data['predicted_prob'].values* area_values
        elif solution_type == 'Emission_mitigation':
            sort_values = merged_data['Expectation_net_benefit'].values * area_values
        elif solution_type == 'Economic':
            sort_values = merged_data['avg_npv'].values * area_values
        
        # 7.2 创建总体精细分位数区间
        fine_percentiles = np.arange(100, -0.5, -0.5)
        fine_bins = np.percentile(sort_values, fine_percentiles)
        
        # 7.3 对每个维度计算州级累积曲线
        solution_curves = {}
        state_integrals = {}  # 存储每个州在该排序方案下的所有维度积分
        
        for var_name, var_label in variables:
            print(f"  处理维度: {var_label}")
            
            # 计算该维度的总效益值
            if var_name == 'predicted_prob':
                benefit_total_values = merged_data[var_name].values * area_values
            else:
                benefit_total_values = merged_data[var_name].values * area_values
            
            # 存储各州的累积曲线数据
            state_cumulative_data = {}
            
            # 7.4 对每个州计算累积曲线
            for state_name in all_states:
                state_mask = data_with_states['NAME'] == state_name
                state_data = data_with_states[state_mask]
                
                if len(state_data) == 0:
                    continue
                
                # 获取该州的数据
                state_indices = state_data.index
                state_benefit_values = benefit_total_values[state_indices]
                state_sort_values = sort_values[state_indices]
                
                # 按照总体精细分位数区间来划分该州的数据
                state_cumulative_benefits = []
                
                for i in range(len(fine_bins) - 1):
                    mask = (state_sort_values <= fine_bins[i]) & (state_sort_values >= fine_bins[i + 1])
                    if np.any(mask):
                        cumulative_benefit = np.sum(state_benefit_values[mask])
                        state_cumulative_benefits.append(cumulative_benefit)
                    else:
                        state_cumulative_benefits.append(0)
                
                # 计算累积曲线
                state_cumulative_benefits = np.array(state_cumulative_benefits)
                state_cumulative_sum = np.cumsum(state_cumulative_benefits)
                state_cumulative_percentage = np.arange(len(state_cumulative_sum)) / (len(state_cumulative_sum) - 1) * 100
                
                # 计算积分
                x_01 = percentage_to_01(state_cumulative_percentage)
                integral_value = calculate_integral(x_01, state_cumulative_sum)
                
                # 存储积分值
                if state_name not in state_integrals:
                    state_integrals[state_name] = {}
                state_integrals[state_name][var_label] = integral_value
                
                # 存储州级数据
                state_cumulative_data[state_name] = {
                    'cumulative_sum': state_cumulative_sum,
                    'cumulative_percentage': state_cumulative_percentage,
                    'data_count': len(state_data)
                }
            
            # 存储该维度的州级曲线数据
            solution_curves[var_label] = state_cumulative_data
        
        # 7.5 生成该排序方案的结果行（每个州一行）
        for state_name in all_states:
            if state_name in state_integrals:
                result_row = {
                    'State_name': state_name,
                    'Solution_Type': solution_type,
                    'Environmental_sustainability': state_integrals[state_name].get('Environmental_sustainability', 0),
                    'Emission_mitigation_ability': state_integrals[state_name].get('Emission_mitigation_ability', 0),
                    'Economic_feasibility': state_integrals[state_name].get('Economic_feasibility', 0)
                }
                results.append(result_row)
        
        # 存储该排序方案的曲线数据
        state_curves_data[solution_type] = solution_curves
    
    # 8. 创建结果DataFrame
    results_df = pd.DataFrame(results)
    
    # 9. 数据清理和格式化
    print("正在格式化结果...")
    results_df = results_df.sort_values(['State_name', 'Solution_Type']).reset_index(drop=True)
    
    # 数值格式化
    numeric_columns = ['Environmental_sustainability', 'Emission_mitigation_ability', 'Economic_feasibility']
    for col in numeric_columns:
        results_df[col] = results_df[col].round(6)
    
    # 10. 绘制各州累积曲线（如果启用）
    if plot_curves:
        print("正在绘制各州累积曲线...")
        create_state_cumulative_curves_simplified(state_curves_data, variables, solution_types, output_dir)
    
    # 11. 输出摘要
    print(f"\n=== 州级别累积积分统计完成 ===")
    print(f"总州数: {len(all_states)}")
    print(f"总记录数: {len(results_df)}")
    print(f"每个州有 {len(solution_types)} 种排序方案")
    
    # 显示前10个州的WCCD方案结果
    wccd_results = results_df[results_df['Solution_Type'] == 'WCCD'].head(10)
    print(f"\n前10个州的WCCD方案结果:")
    print(wccd_results[['State_name', 'Environmental_sustainability', 
                       'Emission_mitigation_ability', 'Economic_feasibility']].to_string(index=False))
    
    return results_df, state_curves_data

```
——————————————————————————————————————————————————————————————————————————————————————————————

其次是绘图函数以及绘图样式的参考。绘图函数需要你重新优化组织逻辑，要求：
（1）绘制逻辑上参考create_state_cumulative_curves_simplified，但要以 solution 为循环， 创建子图
    for i in solution_types：
        在改循环中创建子图（理论上每次输出subplot）的个数应该和len variables相同
        for var_idx, (var_name, var_label) in enumerate variable:
            获取该维度下的州级数据
            state_data = state_curves_data[solution_type][var_label]
                for 绘制各个州的累积曲线

（2）在样式选择上，参照样式示例plot_training_loss，大概的要求是：
    for each subplot in each solution types:
         if target states 的绘制场景，分配 5个差异比较大的颜色
         总体的figsize 不超过 180mm,60mm， 每个subplot 最多60 mm 的width,60 mm的heights 
         加上arrow pathch 
    

```python
def create_state_cumulative_curves_simplified(state_curves_data, variables, solution_types, output_dir):
    """
    简化版：绘制各州累积曲线
    """
    import os
    os.makedirs(output_dir, exist_ok=True)
    
    # 定义颜色方案
    solution_colors = {
        'WCCD': '#1f77b4',      # 深蓝色
        'Environmental': '#2ca02c',  # 深绿色
        'Emission_mitigation': '#d62728',  # 深红色
        'Economic': '#ff7f0e'   # 橙色
    }
    
    # 为每个维度创建子图
    for var_idx, (var_name, var_label) in enumerate(variables):
        fig, axes = plt.subplots(1, len(solution_types), figsize=(20, 6))
        if len(solution_types) == 1:
            axes = [axes]
        
        for sol_idx, solution_type in enumerate(solution_types):
            ax = axes[sol_idx]
            
            # 获取该排序方案下该维度的州级数据
            if solution_type in state_curves_data and var_label in state_curves_data[solution_type]:
                state_data = state_curves_data[solution_type][var_label]
                
                # 绘制各州累积曲线
                for state_name, data in state_data.items():
                    cumulative_sum = data['cumulative_sum']
                    cumulative_percentage = data['cumulative_percentage']
                    
                    # 全量绘制
                    ax.plot(cumulative_percentage, cumulative_sum,
                           color=solution_colors[solution_type], 
                           linewidth=1.0, alpha=0.6)
            
            # 设置图形属性
            ax.set_xlim(0, 100)
            ax.set_xlabel('Cumulative Percentage (%)', fontsize=10)
            ax.set_ylabel(f'{var_label} (Cumulative)', fontsize=10)
            ax.set_title(f'{solution_type}: {var_label}', fontsize=12, fontweight='bold')
            ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # 保存图片
        output_path = f'{output_dir}/state_cumulative_curves_{var_name}_simplified.png'
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"已保存: {output_path}")
        
        plt.show()
```

```python
def plot_training_loss(main_model_file, save_dir='Supplymentary_figure'):
    """
    绘制训练损失曲线

    Parameters:
    -----------
    main_model_file : str
        主模型文件路径
    save_dir : str
        保存目录
    """
    data = load_model_data(main_model_file)
    config = data['config']
    history = config['training_history']
    train_loss = np.array(history['loss'])
    val_loss = np.array(history['val_loss'])
    epochs = range(1, len(train_loss) + 1)

    # 创建图形
    fig, ax = plt.subplots(figsize=(figsize_inches, figsize_inches))

    # 绘制训练和验证损失曲线
    ax.plot(epochs, train_loss, color='#1F78B4',
            linewidth=1.5, label='Training Loss')
    ax.plot(epochs, val_loss, color='#E31A1C',
            linewidth=1.5, label='Validation Loss')
    ax.set_xlabel('Epochs', fontweight='bold')
    ax.set_ylabel('Loss', fontweight='bold')
    ax.set_title('Training Loss', fontweight='bold')
    ax.legend(frameon=False, loc='best')
    ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    arrow_x = FancyArrowPatch(
        posA=(ax.get_xlim()[1] * 1.0, 0),
        posB=(ax.get_xlim()[1] * 1.03, 0),
        transform=ax.get_xaxis_transform(),
        arrowstyle='simple',
        color='black', linewidth=0, mutation_scale=8, zorder=20
    )
    arrow_x.set_clip_on(False)
    ax.add_patch(arrow_x)

    y_lim = ax.get_ylim()
    arrow_y = FancyArrowPatch(
        posA=(0, y_lim[1] * 1.0),
        posB=(0, y_lim[1] * 1.03),
        transform=ax.get_yaxis_transform(),
        arrowstyle='simple',
        color='black', linewidth=0, mutation_scale=8, zorder=20
    )
    arrow_y.set_clip_on(False)
    ax.add_patch(arrow_y)
    ax.tick_params(axis='x', which='major', length=2.5, width=0.5, pad=2, labelsize=5)
    ax.tick_params(axis='y', which='major', length=2.5, width=0.5, pad=2, labelsize=5)
    plt.tight_layout()

    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, 'Figure_training_loss.png')
    fig.savefig(save_path, dpi=300, format='png')
    plt.close()
```



## 1 Load data

In [6]:
import sys
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from matplotlib.patches import FancyArrowPatch
def find_project_root(start_path=None):
    """查找项目根目录（包含data和function目录的目录）"""
    if start_path is None:
        start_path = Path.cwd()
    
    current = Path(start_path).resolve()
    
    # 向上查找，直到找到包含data和function目录的目录
    for _ in range(5):  # 最多向上查找5层
        if (current / 'data').exists() and (current / 'function').exists():
            return current
        parent = current.parent
        if parent == current:  # 到达根目录
            break
        current = parent
    
    # 如果找不到，假设当前目录的父目录是项目根目录
    return Path.cwd().parent

project_root = find_project_root()

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

DATA_PATH = project_root / 'data'

print(f"项目根目录: {project_root}")
print(f"数据路径: {DATA_PATH}")

# 加载合并后的分析数据
merged_data_for_analysis = pd.read_csv(DATA_PATH / 'US_data/df_merged_data_for_analysis.csv')
us_states = gpd.read_file(DATA_PATH / 'cb_2018_us_state_500k.shp')
us_states_4326 = us_states.to_crs('EPSG:4326')

print(f"merged_data_for_analysis shape: {merged_data_for_analysis.shape}")
print(f"merged_data_for_analysis columns: {merged_data_for_analysis.columns.tolist()}")
merged_data_for_analysis.head()

项目根目录: C:\Dev\Landuse_Zhong_clean
数据路径: C:\Dev\Landuse_Zhong_clean\data
merged_data_for_analysis shape: (70337, 13)
merged_data_for_analysis columns: ['lat', 'lon', 'predicted_prob', 'gmm_density', 'sample_type', 'LNCS_expect', 'net_npv_usd', 'area_m2', 'E_yr_pixel', 'pv_potential_dens', 'power_generation_kwha', 'Expectation_net_benefit', 'ccd_optimized']


Unnamed: 0,lat,lon,predicted_prob,gmm_density,sample_type,LNCS_expect,net_npv_usd,area_m2,E_yr_pixel,pv_potential_dens,power_generation_kwha,Expectation_net_benefit,ccd_optimized
0,25.295834,-80.2875,0.902568,2.806133e+19,prediction,811.854595,411724.028993,776295.361002,195812700.0,5568.344207,75671990.0,4756.489612,0.940305
1,25.4375,-80.5375,0.997976,5.399806e+21,prediction,1348.189022,287052.220262,775385.854124,191327300.0,5447.174926,74025340.0,4098.985904,0.828012
2,25.4375,-80.495834,0.995833,1.508036e+20,prediction,971.186274,278019.855286,775385.854124,191021500.0,5438.469092,73907030.0,4467.282818,0.882208
3,25.445833,-80.45417,0.990886,1.800785e+16,prediction,972.811005,272893.696918,775332.207938,190832000.0,5433.449375,73838810.0,4460.63837,0.880366
4,25.445833,-80.40417,0.996591,6.792866e+22,prediction,968.544647,294800.434504,775332.207938,191589400.0,5455.012162,74131840.0,4486.467516,0.888518


In [14]:
merged_data_for_analysis = pd.read_csv(DATA_PATH / 'US_data/df_merged_data_for_analysis.csv')
merged_data_for_analysis

Unnamed: 0,lat,lon,predicted_prob,gmm_density,sample_type,LNCS_expect,net_npv_usd,area_m2,E_yr_pixel,pv_potential_dens,power_generation_kwha,Expectation_net_benefit,ccd_optimized
0,25.295834,-80.287500,0.902568,2.806133e+19,prediction,811.854595,411724.028993,776295.361002,1.958127e+08,5568.344207,7.567199e+07,4756.489612,0.940305
1,25.437500,-80.537500,0.997976,5.399806e+21,prediction,1348.189022,287052.220262,775385.854124,1.913273e+08,5447.174926,7.402534e+07,4098.985904,0.828012
2,25.437500,-80.495834,0.995833,1.508036e+20,prediction,971.186274,278019.855286,775385.854124,1.910215e+08,5438.469092,7.390703e+07,4467.282818,0.882208
3,25.445833,-80.454170,0.990886,1.800785e+16,prediction,972.811005,272893.696918,775332.207938,1.908320e+08,5433.449375,7.383881e+07,4460.638370,0.880366
4,25.445833,-80.404170,0.996591,6.792866e+22,prediction,968.544647,294800.434504,775332.207938,1.915894e+08,5455.012162,7.413184e+07,4486.467516,0.888518
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70332,48.995834,-99.995834,0.914557,5.692829e+14,prediction,377.830800,-556486.868714,563356.382622,1.180457e+08,4625.717362,6.286200e+07,4247.886562,0.790359
70333,48.995834,-99.987500,0.921603,1.413612e+14,prediction,307.516600,-562376.609379,563356.382622,1.178997e+08,4619.997412,6.278426e+07,4312.480812,0.800732
70334,48.995834,-99.895836,0.872176,3.161397e+13,prediction,324.674320,-598319.346882,563356.382622,1.170071e+08,4585.017928,6.230891e+07,4260.343608,0.757385
70335,48.995834,-99.887500,0.799749,6.767585e+13,prediction,323.374660,-594934.043507,563356.382622,1.170906e+08,4588.290785,6.235338e+07,4264.916125,0.691242


## 2. Calculation function


In [211]:
def calculate_state_integration_analysis_simplified(merged_data_for_analysis, us_states_4326, 
                                                   plot_curves=False, output_dir='data/US_data/US_analysis_reslut'):
    """
    简化版：生成更简洁的州级积分统计结果
    """
    print("=== 开始州级别累积积分统计分析（简化版）===")
    
    # 1. 定义三个维度和四种排序方案
    variables = [
        ('predicted_prob', 'Environmental_sustainability'),
        ('Expectation_net_benefit', 'Emission_mitigation_ability'), 
        ('net_npv_usd', 'Economic_feasibility')
    ]
    
    solution_types = ['Environmental suitability', 'Emission mitigation ability', 'Economic viability', '3E-synergy']
    
    # 2. 数据预处理
    print("正在处理数据...")
    # 直接使用merged_data_for_analysis，确保包含所需列
    merged_data = merged_data_for_analysis.copy()
    
    # 检查必需的列是否存在
    required_columns = ['lat', 'lon', 'area_m2', 'predicted_prob', 'Expectation_net_benefit', 'ccd_optimized']
    missing_columns = [col for col in required_columns if col not in merged_data.columns]
    if missing_columns:
        raise ValueError(f"缺少必需的列: {missing_columns}")
    
    if 'net_npv_usd' not in merged_data.columns:
        print("警告: merged_data_for_analysis中缺少avg_npv列，需要从df_economic合并")
    
    area_values = merged_data['area_m2'].values / 10000
    
    # 3. 创建几何列，添加州标签
    print("正在添加州标签...")
    geometry = [Point(xy) for xy in zip(merged_data['lon'], merged_data['lat'])]
    pixel_gdf = gpd.GeoDataFrame(merged_data, geometry=geometry, crs='EPSG:4326')
    pixel_with_states = gpd.sjoin(pixel_gdf, us_states_4326, how='left', predicate='within')
    data_with_states = pixel_with_states.drop(columns=['geometry']).copy()
    
    # 4. 辅助函数
    def percentage_to_01(percentage_array):
        return percentage_array / 100.0
    
    def calculate_integral(x, y):
        """使用梯形法则计算积分"""
        if len(x) < 2:
            return 0.0
        if x[0] > x[-1]:
            x = x[::-1]
            y = y[::-1]
        integral = 0.0
        for i in range(len(x) - 1):
            dx = x[i+1] - x[i]
            avg_y = (y[i] + y[i+1]) / 2.0
            integral += avg_y * dx
        return integral
    
    # 5. 获取所有州列表
    all_states = data_with_states['NAME'].dropna().unique()
    print(f"发现 {len(all_states)} 个州")
    
    # 6. 存储结果 - 简化结构
    results = []
    state_curves_data = {}
    
    # 7. 对每个排序方案计算总体排序和州级曲线
    for solution_type in solution_types:
        print(f"正在处理排序方案: {solution_type}")
        
        # 7.1 确定总体排序依据
        if solution_type == '3E-synergy':
            sort_values = merged_data['ccd_optimized'].values
        elif solution_type == 'Environmental suitability':
            sort_values = merged_data['predicted_prob'].values * area_values
        elif solution_type == 'Emission mitigation ability':
            sort_values = merged_data['Expectation_net_benefit'].values * area_values
        elif solution_type == 'Economic viability':
            sort_values = merged_data['net_npv_usd'].values * area_values
        else:
            raise ValueError(f"未知的排序方案: {solution_type}")
        
        # 7.2 创建总体精细分位数区间
        fine_percentiles = np.arange(100, -0.5, -0.5)
        fine_bins = np.percentile(sort_values, fine_percentiles)
        
        # 7.3 对每个维度计算州级累积曲线
        solution_curves = {}
        state_integrals = {}  # 存储每个州在该排序方案下的所有维度积分
        
        for var_name, var_label in variables:
            print(f"  处理维度: {var_label}")
            
            # 计算该维度的总效益值
            if var_name == 'predicted_prob':
                benefit_total_values = merged_data[var_name].values * area_values
            else:
                benefit_total_values = merged_data[var_name].values * area_values
            
            # 存储各州的累积曲线数据
            state_cumulative_data = {}
            
            # 7.4 对每个州计算累积曲线
            for state_name in all_states:
                state_mask = data_with_states['NAME'] == state_name
                state_data = data_with_states[state_mask]
                
                if len(state_data) == 0:
                    continue
                
                # 获取该州的数据
                state_indices = state_data.index
                state_benefit_values = benefit_total_values[state_indices]
                state_sort_values = sort_values[state_indices]
                
                # 按照总体精细分位数区间来划分该州的数据
                state_cumulative_benefits = []
                
                for i in range(len(fine_bins) - 1):
                    mask = (state_sort_values <= fine_bins[i]) & (state_sort_values >= fine_bins[i + 1])
                    if np.any(mask):
                        cumulative_benefit = np.sum(state_benefit_values[mask])
                        state_cumulative_benefits.append(cumulative_benefit)
                    else:
                        state_cumulative_benefits.append(0)
                
                # 计算累积曲线
                state_cumulative_benefits = np.array(state_cumulative_benefits)
                state_cumulative_sum = np.cumsum(state_cumulative_benefits)
                state_cumulative_percentage = np.arange(len(state_cumulative_sum)) / (len(state_cumulative_sum) - 1) * 100
                
                # 计算积分
                x_01 = percentage_to_01(state_cumulative_percentage)
                integral_value = calculate_integral(x_01, state_cumulative_sum)
                
                # 存储积分值
                if state_name not in state_integrals:
                    state_integrals[state_name] = {}
                state_integrals[state_name][var_label] = integral_value
                
                # 存储州级数据
                state_cumulative_data[state_name] = {
                    'cumulative_sum': state_cumulative_sum,
                    'cumulative_percentage': state_cumulative_percentage,
                    'data_count': len(state_data)
                }
            
            # 存储该维度的州级曲线数据
            solution_curves[var_label] = state_cumulative_data
        
        # 7.5 生成该排序方案的结果行（每个州一行）
        for state_name in all_states:
            if state_name in state_integrals:
                result_row = {
                    'State_name': state_name,
                    'Solution_Type': solution_type,
                    'Environmental_sustainability': state_integrals[state_name].get('Environmental_sustainability', 0),
                    'Emission_mitigation_ability': state_integrals[state_name].get('Emission_mitigation_ability', 0),
                    'Economic_feasibility': state_integrals[state_name].get('Economic_feasibility', 0)
                }
                results.append(result_row)
        
        # 存储该排序方案的曲线数据
        state_curves_data[solution_type] = solution_curves
    
    # 8. 创建结果DataFrame
    results_df = pd.DataFrame(results)
    
    # 9. 数据清理和格式化
    print("正在格式化结果...")
    results_df = results_df.sort_values(['State_name', 'Solution_Type']).reset_index(drop=True)
    
    # 数值格式化
    numeric_columns = ['Environmental_sustainability', 'Emission_mitigation_ability', 'Economic_feasibility']
    for col in numeric_columns:
        results_df[col] = results_df[col].round(6)
    
    # 10. 绘制各州累积曲线（如果启用）
    if plot_curves:
        print("正在绘制各州累积曲线...")
        # Note: This would call the old simplified plotting function if needed
        pass
    
    # 11. 输出摘要
    print(f"\n=== 州级别累积积分统计完成 ===")
    print(f"总州数: {len(all_states)}")
    print(f"总记录数: {len(results_df)}")
    print(f"每个州有 {len(solution_types)} 种排序方案")
    
    # 显示前10个州的WCCD方案结果
    wccd_results = results_df[results_df['Solution_Type'] == '3E-synergy'].head(10)
    print(f"\n前10个州的WCCD方案结果:")
    print(wccd_results[['State_name', 'Environmental_sustainability', 
                       'Emission_mitigation_ability', 'Economic_feasibility']].to_string(index=False))
    
    return results_df, state_curves_data


In [212]:
import matplotlib.pyplot as plt

# 设置matplotlib默认参数（参考S2_LNCS_carbon.ipynb）
plt.rcParams.update({
    'font.size': 5, 
    'axes.titlesize': 5, 
    'axes.labelsize': 5,
    'xtick.labelsize': 5, 
    'ytick.labelsize': 5, 
    'legend.fontsize': 5,
    'font.family': 'Arial'
})

print("Matplotlib默认参数设置完成")


Matplotlib默认参数设置完成


## 3. Publication-quality plotting function


In [221]:
def create_state_cumulative_curves_publication(state_curves_data, variables, solution_types, 
                                                target_states=None, output_dir='Supplymentary/Supplymentary_figure'):
    """
    绘制出版质量的州级累积曲线图
    
    Parameters:
    -----------
    state_curves_data : dict
        州级累积曲线数据字典
    variables : list
        变量列表，格式为 [('var_name', 'var_label'), ...]
    solution_types : list
        排序方案列表
    target_states : list, optional
        目标州列表，如果提供则只绘制这些州的曲线（使用不同颜色）
    output_dir : str
        输出目录
    """
    import matplotlib.pyplot as plt
    from matplotlib.patches import FancyArrowPatch
    from matplotlib.ticker import FuncFormatter
    
    os.makedirs(output_dir, exist_ok=True)
    

    
    # 为每个维度选择主色调（使用较深的颜色）
    # 注意：键名使用下划线格式，与var_label匹配
    dimension_colors = {
        'Environmental_sustainability': '#7b1fa2',  
        'Emission_mitigation_ability': '#005824',   
        'Economic_feasibility': '#a36d1c'          
    }
    
    # 方案颜色（用于3E-synergy，其他方案使用维度颜色）
    solution_colors = {
        '3E-synergy': '#1f77b4',           
        'Environmental suitability': dimension_colors['Environmental_sustainability'],  # 紫色
        'Emission mitigation ability': dimension_colors['Emission_mitigation_ability'],  # 绿色
        'Economic viability': dimension_colors['Economic_feasibility']  # 黄色/棕色
    }
    
    # 目标州的5种不同颜色（使用维度颜色系中的不同深浅）
    target_state_colors = [
        '#1f77b4',  
        '#d62728',  
        '#2ca02c',  
        '#ff7f0e',  
        '#9467bd'   
    ]
    
    # 方案对应的线型（参考6.9 Figure4_Cumulative_pirority.ipynb）
    solution_linestyles = {
        'Environmental suitability': ':',        # 点线 (对应predicted_prob)
        'Emission mitigation ability': '--',  # 虚线 (对应Expectation_net_benefit)
        'Economic viability': '-',               # 实线 (对应avg_npv)
        '3E-synergy': '-.'                   # 点划线 (对应ccd_optimized)
    }
    
    # 尺寸设置（mm转inches）
    fig_width_mm = 175
    fig_height_mm = 50
    subplot_width_mm = 60
    subplot_height_mm = 40
    
    fig_width_inches = fig_width_mm / 25.4
    fig_height_inches = fig_height_mm / 25.4
    subplot_width_inches = subplot_width_mm / 25.4
    subplot_height_inches = subplot_height_mm / 25.4
    
    # 对每个排序方案创建图形
    for sol_idx, solution_type in enumerate(solution_types):
        print(f"正在绘制排序方案: {solution_type}")
        
        # 创建图形，1行3列（3个变量）
        fig, axes = plt.subplots(1, len(variables), 
                                 figsize=(fig_width_inches, fig_height_inches))
        
        if len(variables) == 1:
            axes = [axes]
        
        # 用于存储图例信息（仅在target_states场景下使用）
        legend_handles = []
        legend_labels = []
        
        # 对每个变量（维度）创建子图
        for var_idx, (var_name, var_label) in enumerate(variables):
            ax = axes[var_idx]
            
            # 获取该排序方案下该维度的州级数据
            if solution_type in state_curves_data and var_label in state_curves_data[solution_type]:
                state_data = state_curves_data[solution_type][var_label]


                # 计算所有曲线的最大值和最小值（用于设置y轴范围）
                all_cumulative_sums = []
                for data in state_data.values():
                    all_cumulative_sums.extend(data['cumulative_sum'])
                
                if len(all_cumulative_sums) > 0:
                    all_cumulative_sums = np.array(all_cumulative_sums)
                    all_curves_max = np.max(all_cumulative_sums)
                    all_curves_min = np.min(all_cumulative_sums)
                else:
                    all_curves_max = 0
                    all_curves_min = 0
                
                # 判断是绘制所有州还是只绘制目标州
                if target_states is not None:
                    # 目标州场景：5个目标州用不同颜色，其他州用灰色
                    state_color_map = {}
                    for idx, state in enumerate(target_states):
                        if state in state_data:
                            state_color_map[state] = target_state_colors[idx % len(target_state_colors)]
                    
                    # 先绘制其他州（灰色背景）
                    for state_name, data in state_data.items():
                        if state_name not in state_color_map:
                            cumulative_sum = data['cumulative_sum']
                            cumulative_percentage = data['cumulative_percentage']
                            ax.plot(cumulative_percentage, cumulative_sum,
                                   color='#cccccc', linewidth=0.5, alpha=0.3)
                    
                    # 获取当前方案对应的线型
                    linestyle = solution_linestyles.get(solution_type, '-')

                    for state_name in target_states:
                        if state_name in state_data:
                            data = state_data[state_name]
                            cumulative_sum = data['cumulative_sum']
                            cumulative_percentage = data['cumulative_percentage']
                            
                            if var_idx == 0:
                                legend_label = f"{state_name}"
                                line, = ax.plot(cumulative_percentage, cumulative_sum,
                                               color=state_color_map[state_name],
                                               linestyle=linestyle,
                                               linewidth=1.5, alpha=0.8, label=legend_label)
                                if state_name not in legend_labels:
                                    legend_handles.append(line)
                                    legend_labels.append(legend_label)
                            else:
                                ax.plot(cumulative_percentage, cumulative_sum,
                                       color=state_color_map[state_name],
                                       linestyle=linestyle,
                                       linewidth=1.5, alpha=0.8)
                else:
                    dimension_color = dimension_colors.get(var_label, solution_colors.get(solution_type, '#1f77b4'))
                    for state_name, data in state_data.items():
                        cumulative_sum = data['cumulative_sum']
                        cumulative_percentage = data['cumulative_percentage']
                        ax.plot(cumulative_percentage, cumulative_sum,
                               color=dimension_color, linewidth=1.0, alpha=0.6)
            else:
                # 如果没有数据，设置默认值
                all_curves_max = 0
                all_curves_min = 0
            
            ax.set_xlim(0, 100)
            ax.set_xlabel('Priority order in nation level (%)', fontweight='bold', fontsize=5)
            ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            
            if var_name == 'predicted_prob':
                # Environmental: 除以1e6，保留1位小数，显示1e6
                ax.set_ylim(0, all_curves_max * 1.1)
                ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{x/1e6:.1f}'))
                ax.set_ylabel('cumulative score (Unitless, $\\times 10^6$)', fontsize=6, fontweight='bold')
                yticks = np.linspace(0, all_curves_max * 1.03, 5)
                ax.set_yticks(yticks)
            elif var_name == 'Expectation_net_benefit':
                ax.set_ylim(0, all_curves_max * 1.1)
                ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: f'{x/1e9:.1f}'))
                ax.set_ylabel('cumulative mitigation (Gt CO$_2$)', fontsize=6, fontweight='bold')
                yticks = np.linspace(0, all_curves_max * 1.03, 5)
                ax.set_yticks(yticks)
            elif var_name == 'net_npv_usd':
                y_range = all_curves_max - all_curves_min
                if y_range == 0:
                    y_range = abs(all_curves_max) * 0.1 if all_curves_max != 0 else 1
                y_min = all_curves_min - y_range * 0.1
                y_max = all_curves_max + y_range * 0.1
                ax.set_ylim(y_min, y_max * 1.1)
                ax.set_ylabel('cumulative revenue (Billion USD)', fontsize=6, fontweight='bold', labelpad=1.2)
                
                # 生成yticks，并确保0会被标注出来
                yticks = np.linspace(y_min, y_max*1.03, 5)
                if not np.isclose(yticks, 0).any() and (0 > yticks.min()) and (0 < yticks.max()):
                    yticks = np.sort(np.append(yticks, 0))
                ax.set_yticks(yticks)
                
                def billion_formatter(x, pos):
                    if np.isclose(x, 0):
                        return '0'
                    return f'{x/1e9:.1f}'
                ax.yaxis.set_major_formatter(FuncFormatter(billion_formatter))
            else:
                # 默认格式
                ax.set_ylabel(f'{var_label}', fontweight='bold', fontsize=8)
            
            
            arrow_x = FancyArrowPatch(
                posA=(ax.get_xlim()[1] * 1.0, 0),
                posB=(ax.get_xlim()[1] * 1.03, 0),
                transform=ax.get_xaxis_transform(),
                arrowstyle='simple',
                color='black', linewidth=0, mutation_scale=8, zorder=20
            )
            arrow_x.set_clip_on(False)
            ax.add_patch(arrow_x)
            
            y_lim = ax.get_ylim()
            arrow_y = FancyArrowPatch(
                posA=(0, y_lim[1] * 1.0),
                posB=(0, y_lim[1] * 1.03),
                transform=ax.get_yaxis_transform(),
                arrowstyle='simple',
                color='black', linewidth=0, mutation_scale=8, zorder=20
            )
            arrow_y.set_clip_on(False)
            ax.add_patch(arrow_y)
            
            # 设置刻度参数
            ax.tick_params(axis='x', which='major', length=2.5, width=0.5, pad=2, labelsize=5)
            ax.tick_params(axis='y', which='major', length=2.5, width=0.5, pad=2, labelsize=5)
        
        plt.tight_layout()
        
        # 在fig层面添加面板标签 (a, b, c, d) - 为每个solution的figure添加
        panel_label = chr(97 + sol_idx)  # 97是'a'的ASCII码，依次为a, b, c, d
        fig.text(0.01, 0.99, panel_label, ha='left', va='top', fontsize=7, fontweight='bold',
               transform=fig.transFigure,
               bbox=dict(facecolor='white', edgecolor='none', alpha=0.7, pad=0.2, lw=0), 
               zorder=100)
        
        if target_states is not None and len(legend_handles) > 0:
            # 创建图例，确保显示线型
            fig.legend(legend_handles, legend_labels, 
                       loc='upper center', 
                       bbox_to_anchor=(0.5, 1.0),
                       ncol=len(legend_labels),
                       frameon=False,
                       fontsize=5,
                       columnspacing=1.0,  
                       handlelength=3.0,   
                       handletextpad=0.5)  
            # 调整subplots位置，为图例留出空间
            plt.subplots_adjust(top=0.88)
            # 在target_states场景下，文本位置需要调整（因为图例占用了顶部空间）
            fig.text(0.98, 0.95, f'by {solution_type}', fontsize=5, fontweight='bold', 
                    va='top', ha='right', transform=fig.transFigure, zorder=100)
        else:
            # 在all_states场景下，文本在正常位置
            fig.text(0.98, 0.98, f'by {solution_type}', fontsize=5, fontweight='bold', 
                    va='top', ha='right', transform=fig.transFigure, zorder=100)


        # 保存图片
        if target_states is not None:
            output_path = os.path.join(output_dir, f'Figure_state_cumulative_target_{solution_type}.png')
        else:
            output_path = os.path.join(output_dir, f'Figure_state_cumulative_all_{solution_type}.png')
        
        fig.savefig(output_path, dpi=300, format='png')
        print(f"已保存: {output_path}")
        plt.close()
    
    print(f"所有图形已保存到: {output_dir}")


In [222]:
# 定义变量和方案
variables = [
    ('predicted_prob', 'Environmental_sustainability'),
    ('Expectation_net_benefit', 'Emission_mitigation_ability'), 
    ('net_npv_usd', 'Economic_feasibility')
]

solution_types = ['Environmental suitability', 'Emission mitigation ability', 'Economic viability', '3E-synergy']
target_states = ['California', 'Texas', 'Georgia', 'Indiana', 'New York']

# 执行计算（不绘制，使用新的绘图函数）
print("开始计算州级累积曲线...")
results_df, state_curves_data = calculate_state_integration_analysis_simplified(
    merged_data_for_analysis, 
    us_states_4326,
    plot_curves=False
)


开始计算州级累积曲线...
=== 开始州级别累积积分统计分析（简化版）===
正在处理数据...
正在添加州标签...
发现 48 个州
正在处理排序方案: Environmental suitability
  处理维度: Environmental_sustainability
  处理维度: Emission_mitigation_ability
  处理维度: Economic_feasibility
正在处理排序方案: Emission mitigation ability
  处理维度: Environmental_sustainability
  处理维度: Emission_mitigation_ability
  处理维度: Economic_feasibility
正在处理排序方案: Economic viability
  处理维度: Environmental_sustainability
  处理维度: Emission_mitigation_ability
  处理维度: Economic_feasibility
正在处理排序方案: 3E-synergy
  处理维度: Environmental_sustainability
  处理维度: Emission_mitigation_ability
  处理维度: Economic_feasibility
正在格式化结果...

=== 州级别累积积分统计完成 ===
总州数: 48
总记录数: 192
每个州有 4 种排序方案

前10个州的WCCD方案结果:
 State_name  Environmental_sustainability  Emission_mitigation_ability  Economic_feasibility
    Alabama                  52292.254042                 2.749749e+08         -3.922319e+09
    Arizona                  26106.905653                 1.797899e+08          3.114122e+10
   Arkansas                  40071.5333

In [223]:
# 绘制所有州的累积曲线
print("\n=== 绘制所有州的累积曲线 ===")
create_state_cumulative_curves_publication(
    state_curves_data, 
    variables, 
    solution_types,
    target_states=None,  # 所有州
    output_dir='Supplymentary_figure'
)



=== 绘制所有州的累积曲线 ===
正在绘制排序方案: Environmental suitability
已保存: Supplymentary_figure\Figure_state_cumulative_all_Environmental suitability.png
正在绘制排序方案: Emission mitigation ability
已保存: Supplymentary_figure\Figure_state_cumulative_all_Emission mitigation ability.png
正在绘制排序方案: Economic viability
已保存: Supplymentary_figure\Figure_state_cumulative_all_Economic viability.png
正在绘制排序方案: 3E-synergy
已保存: Supplymentary_figure\Figure_state_cumulative_all_3E-synergy.png
所有图形已保存到: Supplymentary_figure


In [224]:
# 绘制目标州的累积曲线
print("\n=== 绘制目标州的累积曲线 ===")
create_state_cumulative_curves_publication(
    state_curves_data, 
    variables, 
    solution_types,
    target_states=target_states,  # 目标州
    output_dir='Supplymentary_figure'
)



=== 绘制目标州的累积曲线 ===
正在绘制排序方案: Environmental suitability
已保存: Supplymentary_figure\Figure_state_cumulative_target_Environmental suitability.png
正在绘制排序方案: Emission mitigation ability
已保存: Supplymentary_figure\Figure_state_cumulative_target_Emission mitigation ability.png
正在绘制排序方案: Economic viability
已保存: Supplymentary_figure\Figure_state_cumulative_target_Economic viability.png
正在绘制排序方案: 3E-synergy
已保存: Supplymentary_figure\Figure_state_cumulative_target_3E-synergy.png
所有图形已保存到: Supplymentary_figure
