# 0 Load data

In [109]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

# all the dataframes are clipped to the US states 


df_economic = pd.read_csv('data/US_data/df_economic.csv')
df_weight = pd.read_csv('data/US_data/df_weight.csv')
df_strategies = pd.read_csv('data/US_data/df_strategies.csv')
df_net_benefit = pd.read_csv('data/US_data/df_net_benefit.csv')
df_pv_npv = pd.read_csv('data/US_data/df_pv_npv.csv')
df_agricultural_npv = pd.read_csv('data/US_data/df_agricultural_npv.csv')
df_afforestation_npv = pd.read_csv('data/US_data/df_afforestation_npv.csv')
df_natural_npv = pd.read_csv('data/US_data/df_natural_npv.csv')
df_pixel_optimized_data = pd.read_csv('data/US_data/df_pixel_optimized_data.csv')
df_mlp_scores = pd.read_csv('data/US_data/df_mlp_scores.csv')

us_nation = gpd.read_file(r'data\US_data\cb_2018_us_nation_5m.shp')
us_states = gpd.read_file(r'data\cb_2018_us_state_500k.shp')
us_counties = gpd.read_file('data/cb_2018_us_county_500k.shp')


us_nation_4326 = us_nation.to_crs('EPSG:4326')
us_states_4326 = us_states.to_crs('EPSG:4326')
us_counties_4326 = us_counties.to_crs('EPSG:4326')

# 1 Generate csv for US state 

Logic is: 

to generate cvs for US state, this csv is like 

对于pv installed， Environmental suitability /ha ， Emission mitigation / ha ，Economic NPV /ha ，Power generation / ha。

first, group state  'area_m2' use sum , this is variable [abandoned land (ha) ] for each state 

second, for each state and use 'predicted_prob' * 'area_m2' in every row, and sum them to calculate cumulative , 

finally, next use this sum/[abandoned land (ha) ] to caculate averge effect in each state 



| State name | abandoned land (ha) | pv installed  | Environmental suitability /ha | Emission mitigation / ha | Economic NPV /ha| Power generation /ha  | CCD Mean improvement |
|------------|---------------------|--------------|--------------------|-----------------|--------------|--------------|--------------|
|   Row1     |   Row1              |   Row1       |   Row1             |   Row1          |   Row1       |   Row1       |   Row1       |
|   Row2     |   Row2              |   Row2       |   Row2             |   Row2          |   Row2       |   Row2       |   Row2       |


## 1.1 Adjust PV carbon density (Important)

In [76]:
import numpy as np

# 这里所合并的信息在inital的版本较为全面，如果考虑到后期呈现问题，直接Drop即可

emission_intensity = pd.DataFrame({
    'year': ['2018',  '2030', '2035', '2040'],
    'CO2_g_per_kWh': [400.3,  302,  279.2, 261.1]
})



df_weight['LNCS_expect'] = (
    df_weight['final_forest'] * df_weight['weighted_density_Forest'] +
    df_weight['final_agro'] * df_weight['weighted_density_Agricultural'] +
    df_strategies['final_veg'] * df_weight['weighted_density_Vegetation']
)

# 数据处理，df_net_benefit提取sites的面积信息、发电量（PIXEL为单位），pv_potential_dens是转化为碳的计算方式
# df_weight 提取环境变量 predicted_prob与Expectation_net_benefit
# df_economic 提取经济变量 net_npv_usd
power_data = df_net_benefit[['lat', 'lon', 'area_m2','E_yr_pixel']].copy()
env_data = df_weight[['lat', 'lon', 'predicted_prob','gmm_density','sample_type']].copy()
emission_data = df_weight[['lat', 'lon', 'Expectation_net_benefit','pv_potential_dens','LNCS_expect']].copy()
economic_2050 = df_economic[df_economic['analysis_year'] == 2050]
avg_npv = economic_2050.groupby(['lat', 'lon'])['net_npv_usd'].mean().reset_index()

merged_data_for_analysis = env_data.merge(emission_data, on=['lat', 'lon'], how='outer') \
                      .merge(avg_npv, on=['lat', 'lon'], how='outer') \
                      .merge(power_data, on=['lat', 'lon'], how='outer')


# Transform pv_transform (2020 emission factor) to 2030, 2040, 2050 using emission_intensity
pv_transform_2020 = 352  
# Interpolate emission intensity for 2020, 2030, 2040, 2050
years_target = [2020, 2030, 2040, 2050]
em_factors = np.interp(
    years_target,
    emission_intensity['year'].astype(int),
    emission_intensity['CO2_g_per_kWh']
)

# Calculate scaling factors relative to 2020
scaling_factors = em_factors / em_factors[0]

# 生成一个pv_transfer的dataframe
pv_transfer = pd.DataFrame({
    'year': [2020, 2030, 2040, 2050],
    'pv_transform': [pv_transform_2020] + list(pv_transform_2020 * scaling_factors[1:])
})

# 计算每10年区间的累计光伏转化能力，并最终汇总到2050
years = [2020, 2030, 2040, 2050]

for idx in range(len(years)-1):
    y_start, y_end = years[idx], years[idx+1]
    pv_start = pv_transfer.loc[pv_transfer['year'] == y_start, 'pv_transform'].values[0]
    pv_end = pv_transfer.loc[pv_transfer['year'] == y_end, 'pv_transform'].values[0]
    # 线性插值每年pv_transform，累计10年
    pv_sum = 0
    for y in range(y_start, y_end):
        pv_y = pv_start + (pv_end - pv_start) * (y - y_start) / (y_end - y_start)
        pv_sum += merged_data_for_analysis['E_yr_pixel'] * pv_y / 1000 / 1000 * 0.27 / (merged_data_for_analysis['area_m2'] / 10000)
    merged_data_for_analysis[f'pv_potential_{y_start}_{y_end-1}_sum'] = pv_sum

# 汇总2020-2050累计值，汇总site的累计光伏减排能力(t C)
merged_data_for_analysis['pv_potential_total(t)'] = sum(
    merged_data_for_analysis[f'pv_potential_{years[i]}_{years[i+1]-1}_sum'] for i in range(len(years)-1)
)

merged_data_for_analysis['power_generation_kwha'] = merged_data_for_analysis['E_yr_pixel'] * 30 / merged_data_for_analysis['area_m2'] * 10000

In [77]:
merged_data_for_analysis['Expectation_net_benefit_adjust'] = merged_data_for_analysis['pv_potential_total(t)'] - merged_data_for_analysis['LNCS_expect']
merged_data_for_analysis.drop(columns=['pv_potential_2040_2049_sum','pv_potential_2030_2039_sum','pv_potential_2020_2029_sum'], inplace=True)

In [None]:
# merged_data_for_analysis.to_csv('data/US_data/df_merged_data_for_analysis.csv', index=False)

In [None]:
df_pixel_optimized_data

In [96]:
merged_data_for_analysis

Unnamed: 0,lat,lon,predicted_prob,gmm_density,sample_type,Expectation_net_benefit,pv_potential_dens,LNCS_expect,net_npv_usd,area_m2,E_yr_pixel,pv_potential_total(t),power_generation_kwha,Expectation_net_benefit_adjust
0,25.295834,-80.287500,0.000000,3.874315e+16,negative_sample,6382.303639,7191.865919,809.562299,407275.912577,776295.361002,1.958127e+08,5568.344207,7.567199e+07,4758.781908
1,25.437500,-80.537500,0.046746,2.054387e+18,prediction,5690.624063,7035.368191,1344.744161,283953.326746,775385.854124,1.913273e+08,5447.174926,7.402534e+07,4102.430766
2,25.437500,-80.495834,0.000000,4.247702e+15,negative_sample,6055.300591,7024.124059,968.823440,275019.106094,775385.854124,1.910215e+08,5438.469092,7.390703e+07,4469.645652
3,25.445833,-80.454170,0.000000,1.117299e+13,negative_sample,6048.645138,7017.640780,968.995616,269947.631894,775332.207938,1.908320e+08,5433.449375,7.383881e+07,4464.453759
4,25.445833,-80.404170,0.017465,4.463459e+17,prediction,6081.010322,7045.490473,964.480123,291620.314126,775332.207938,1.915894e+08,5455.012162,7.413184e+07,4490.532039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64977,48.995834,-111.970830,0.026153,1.493778e+17,prediction,6210.923349,6392.674772,181.751426,-220868.055076,563356.382622,1.263101e+08,4949.565792,6.726299e+07,4767.814366
64978,48.995834,-110.345830,0.068266,4.120084e+17,prediction,6206.053157,6395.853979,189.800818,-219393.096487,563356.382622,1.263729e+08,4952.027311,6.729644e+07,4762.226494
64979,48.995834,-106.020836,0.065097,3.232082e+17,prediction,6204.952188,6412.037900,207.085706,-205704.827949,563356.382622,1.266927e+08,4964.557807,6.746673e+07,4757.472100
64980,48.995834,-104.087500,0.109234,6.627032e+21,prediction,6077.421747,6292.888713,215.466966,-300314.323514,563356.382622,1.243385e+08,4872.305853,6.621305e+07,4656.838886


In [92]:
df_weight.drop(columns=['Expectation_net_benefit'], inplace=True)
df_weight.drop(columns=['pv_potential_dens'], inplace=True)


In [None]:
# df_weight.to_csv('data/US_data/df_weight.csv', index=False)

## 1.2 Statistic for State-level 

In [None]:
merged_data_for_analysis


Unnamed: 0,lat,lon,predicted_prob,gmm_density,sample_type,Expectation_net_benefit,pv_potential_dens,LNCS_expect,net_npv_usd,area_m2,E_yr_pixel,pv_potential_total(t),power_generation_kwha,Expectation_net_benefit_adjust
0,25.295834,-80.287500,0.000000,3.874315e+16,negative_sample,6382.303639,7191.865919,809.562299,407275.912577,776295.361002,1.958127e+08,5568.344207,7.567199e+07,4758.781908
1,25.437500,-80.537500,0.046746,2.054387e+18,prediction,5690.624063,7035.368191,1344.744161,283953.326746,775385.854124,1.913273e+08,5447.174926,7.402534e+07,4102.430766
2,25.437500,-80.495834,0.000000,4.247702e+15,negative_sample,6055.300591,7024.124059,968.823440,275019.106094,775385.854124,1.910215e+08,5438.469092,7.390703e+07,4469.645652
3,25.445833,-80.454170,0.000000,1.117299e+13,negative_sample,6048.645138,7017.640780,968.995616,269947.631894,775332.207938,1.908320e+08,5433.449375,7.383881e+07,4464.453759
4,25.445833,-80.404170,0.017465,4.463459e+17,prediction,6081.010322,7045.490473,964.480123,291620.314126,775332.207938,1.915894e+08,5455.012162,7.413184e+07,4490.532039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64977,48.995834,-111.970830,0.026153,1.493778e+17,prediction,6210.923349,6392.674772,181.751426,-220868.055076,563356.382622,1.263101e+08,4949.565792,6.726299e+07,4767.814366
64978,48.995834,-110.345830,0.068266,4.120084e+17,prediction,6206.053157,6395.853979,189.800818,-219393.096487,563356.382622,1.263729e+08,4952.027311,6.729644e+07,4762.226494
64979,48.995834,-106.020836,0.065097,3.232082e+17,prediction,6204.952188,6412.037900,207.085706,-205704.827949,563356.382622,1.266927e+08,4964.557807,6.746673e+07,4757.472100
64980,48.995834,-104.087500,0.109234,6.627032e+21,prediction,6077.421747,6292.888713,215.466966,-300314.323514,563356.382622,1.243385e+08,4872.305853,6.621305e+07,4656.838886


In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np

def generate_state_level_csv(merged_data_for_analysis, us_states_4326, output_path='data/US_data/state_level_analysis.csv'):
    """
    生成州级统计CSV，按照指定逻辑计算各指标
    
    逻辑：
    1. 按州分组，计算总废弃土地面积 (abandoned land ha)
    2. 对每个州，计算各指标的加权平均 (密度 × 面积，然后除以总面积)
    3. 输出标准化的州级统计表
    """
    
    # 1. 添加州标签
    print("正在添加州标签...")
    geometry = [Point(xy) for xy in zip(merged_data_for_analysis['lon'], merged_data_for_analysis['lat'])]
    pixel_gdf = gpd.GeoDataFrame(merged_data_for_analysis, geometry=geometry, crs='EPSG:4326')
    pixel_with_states = gpd.sjoin(pixel_gdf, us_states_4326, how='left', predicate='within')
    
    # 移除几何列，保留数据
    data_with_states = pixel_with_states.drop(columns=['geometry']).copy()
    
    # 2. 按州分组计算统计
    print("正在计算州级统计...")
    
    def calculate_state_metrics(group):
        """计算单个州的指标"""
        # 基础信息
        state_name = group['NAME'].iloc[0] if not group['NAME'].isna().all() else 'Unknown'
        
        # 先转换面积单位：m² -> ha
        area_ha = group['area_m2'] / 10000
        total_area_ha = area_ha.sum()
        
        # 各指标的加权平均计算
        # 使用密度 × 面积(ha)，然后除以总面积(ha)得到加权平均
        
        metrics = {
            'State_name': state_name,
            'abandoned_land_ha': total_area_ha,
        }
        
        # Environmental suitability /ha (predicted_prob的加权平均)
        if 'predicted_prob' in group.columns:
            env_weighted_sum = (group['predicted_prob'] * area_ha).sum()
            metrics['Environmental_suitability_per_ha'] = env_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Environmental_suitability_per_ha'] = 0
        
        # Emission mitigation /ha (Expectation_net_benefit的加权平均)
        if 'Expectation_net_benefit' in group.columns:
            emission_weighted_sum = (group['Expectation_net_benefit'] * area_ha).sum()
            metrics['Emission_mitigation_per_ha'] = emission_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Emission_mitigation_per_ha'] = 0
        
        # Economic NPV /ha (avg_npv的加权平均)
        if 'net_npv_usd' in group.columns:
            economic_weighted_sum = (group['net_npv_usd'] * area_ha).sum()
            metrics['Economic_NPV_per_ha'] = economic_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Economic_NPV_per_ha'] = 0
        
        # Power generation /ha (E_yr_pixel的加权平均)
        if 'E_yr_pixel' in group.columns:
            power_weighted_sum = (group['E_yr_pixel'] * area_ha).sum()
            metrics['Power_generation_per_ha'] = power_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Power_generation_per_ha'] = 0
        
        # CCD Mean improvement (如果有ccd_optimized列)
        if 'ccd_optimized' in group.columns:
            ccd_weighted_sum = (group['ccd_optimized'] * area_ha).sum()
            metrics['CCD_Mean_improvement'] = ccd_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['CCD_Mean_improvement'] = 0
        
        # # 额外统计信息
        # metrics['pixel_count'] = len(group)
        # metrics['avg_pixel_area_ha'] = area_ha.mean()        
        return pd.Series(metrics)
    
    # 按州分组并计算指标
    state_stats = data_with_states.groupby('NAME', observed=True).apply(calculate_state_metrics).reset_index(drop=True)
    
    # 3. 数据清理和格式化
    print("正在格式化数据...")
    
    # 移除未知州
    state_stats = state_stats[state_stats['State_name'] != 'Unknown'].copy()
    
    # 按废弃土地面积排序
    state_stats = state_stats.sort_values('abandoned_land_ha', ascending=False).reset_index(drop=True)
    
    # 数值格式化
    numeric_columns = [
        'abandoned_land_ha', 'Environmental_suitability_per_ha', 'Emission_mitigation_per_ha',
        'Economic_NPV_per_ha', 'Power_generation_per_ha', 'CCD_Mean_improvement', 'avg_pixel_area_ha'
    ]
    
    for col in numeric_columns:
        if col in state_stats.columns:
            state_stats[col] = state_stats[col].round(4)
    
    # 4. 保存CSV
    print(f"正在保存到: {output_path}")
    state_stats.to_csv(output_path, index=False)
    
    # 5. 输出摘要
    print(f"\n州级统计完成！")
    print(f"总州数: {len(state_stats)}")
    print(f"总废弃土地面积: {state_stats['abandoned_land_ha'].sum():.2f} 公顷")
    print(f"平均每州废弃土地面积: {state_stats['abandoned_land_ha'].mean():.2f} 公顷")
    
    # 显示前5个州
    print(f"\n前5个州（按废弃土地面积排序）:")
    display_cols = ['State_name', 'abandoned_land_ha', 'Environmental_suitability_per_ha', 
                   'Emission_mitigation_per_ha', 'Economic_NPV_per_ha']
    print(state_stats[display_cols].head().to_string(index=False))
    
    return state_stats

# 使用示例
if __name__ == "__main__":
    # 假设 merged_data_for_analysis 和 us_states_4326 已经定义
    state_stats_df = generate_state_level_csv(merged_data_for_analysis, us_states_4326)

正在添加州标签...
正在计算州级统计...
正在格式化数据...
正在保存到: data/US_data/state_level_analysis.csv

州级统计完成！
总州数: 48
总废弃土地面积: 4376972.78 公顷
平均每州废弃土地面积: 91186.93 公顷

前5个州（按废弃土地面积排序）:
State_name  abandoned_land_ha  Environmental_suitability_per_ha  Emission_mitigation_per_ha  Economic_NPV_per_ha
     Texas        515459.2186                            0.7860                   6722.3459          262293.9622
  Illinois        291941.0631                            0.8573                   5763.0992         -527071.3078
California        271672.0489                            0.5379                   7481.2128          770496.9652
   Georgia        244587.2796                            0.9510                   6426.1686          -45315.8133
   Indiana        219739.8319                            0.8795                   5644.3213         -605635.6691


  state_stats = data_with_states.groupby('NAME', observed=True).apply(calculate_state_metrics).reset_index(drop=True)


In [106]:
state_stats_df

Unnamed: 0,State_name,abandoned_land_ha,Environmental_suitability_per_ha,Emission_mitigation_per_ha,Economic_NPV_per_ha,Power_generation_per_ha,CCD_Mean_improvement
0,Texas,515459.2186,0.786,6722.3459,262294.0,180696200.0,0
1,Illinois,291941.0631,0.8573,5763.0992,-527071.3,137820800.0,0
2,California,271672.0489,0.5379,7481.2128,770497.0,185249900.0,0
3,Georgia,244587.2796,0.951,6426.1686,-45315.81,168911300.0,0
4,Indiana,219739.8319,0.8795,5644.3213,-605635.7,135997100.0,0
5,Michigan,169962.5463,0.9372,5429.4125,-750991.0,126439700.0,0
6,North Carolina,167767.8295,0.9584,6181.1629,-203370.5,157271000.0,0
7,Wisconsin,156027.1466,0.8202,5484.7251,-735375.9,124775700.0,0
8,Ohio,152707.5281,0.8538,5611.4598,-610152.5,135115500.0,0
9,Florida,151815.8627,0.405,6098.5117,1205.917,177793000.0,0


## 1.3 Statitic for County-level

In [108]:
def generate_county_level_csv(merged_data_for_analysis, us_counties_4326, output_path='data/US_data/county_level_analysis.csv'):
    """
    生成县级统计CSV，按照与州级相同的逻辑计算各指标
    """
    print("正在添加县标签...")
    geometry = [Point(xy) for xy in zip(merged_data_for_analysis['lon'], merged_data_for_analysis['lat'])]
    pixel_gdf = gpd.GeoDataFrame(merged_data_for_analysis, geometry=geometry, crs='EPSG:4326')
    pixel_with_counties = gpd.sjoin(pixel_gdf, us_counties_4326, how='left', predicate='within')
    data_with_counties = pixel_with_counties.drop(columns=['geometry']).copy()

    print("正在计算县级统计...")

    def calculate_county_metrics(group):
        county_name = group['NAME'].iloc[0] if not group['NAME'].isna().all() else 'Unknown'
        statefp = group['STATEFP'].iloc[0] if 'STATEFP' in group.columns else ''
        countyfp = group['COUNTYFP'].iloc[0] if 'COUNTYFP' in group.columns else ''
        area_ha = group['area_m2'] / 10000
        total_area_ha = area_ha.sum()
        metrics = {
            'County_name': county_name,
            'STATEFP': statefp,
            'COUNTYFP': countyfp,
            'abandoned_land_ha': total_area_ha,
        }
        if 'predicted_prob' in group.columns:
            env_weighted_sum = (group['predicted_prob'] * area_ha).sum()
            metrics['Environmental_suitability_per_ha'] = env_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Environmental_suitability_per_ha'] = 0
        if 'Expectation_net_benefit' in group.columns:
            emission_weighted_sum = (group['Expectation_net_benefit'] * area_ha).sum()
            metrics['Emission_mitigation_per_ha'] = emission_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Emission_mitigation_per_ha'] = 0
        if 'net_npv_usd' in group.columns:
            economic_weighted_sum = (group['net_npv_usd'] * area_ha).sum()
            metrics['Economic_NPV_per_ha'] = economic_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Economic_NPV_per_ha'] = 0
        if 'E_yr_pixel' in group.columns:
            power_weighted_sum = (group['E_yr_pixel'] * area_ha).sum()
            metrics['Power_generation_per_ha'] = power_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['Power_generation_per_ha'] = 0
        if 'ccd_optimized' in group.columns:
            ccd_weighted_sum = (group['ccd_optimized'] * area_ha).sum()
            metrics['CCD_Mean_improvement'] = ccd_weighted_sum / total_area_ha if total_area_ha > 0 else 0
        else:
            metrics['CCD_Mean_improvement'] = 0
        return pd.Series(metrics)

    county_stats = data_with_counties.groupby(['STATEFP', 'COUNTYFP', 'NAME'], observed=True).apply(calculate_county_metrics).reset_index(drop=True)
    county_stats = county_stats[county_stats['County_name'] != 'Unknown'].copy()
    county_stats = county_stats.sort_values('abandoned_land_ha', ascending=False).reset_index(drop=True)
    numeric_columns = [
        'abandoned_land_ha', 'Environmental_suitability_per_ha', 'Emission_mitigation_per_ha',
        'Economic_NPV_per_ha', 'Power_generation_per_ha', 'CCD_Mean_improvement'
    ]
    for col in numeric_columns:
        if col in county_stats.columns:
            county_stats[col] = county_stats[col].round(4)
    print(f"正在保存到: {output_path}")
    county_stats.to_csv(output_path, index=False)
    print(f"\n县级统计完成！")
    print(f"总县数: {len(county_stats)}")
    print(f"总废弃土地面积: {county_stats['abandoned_land_ha'].sum():.2f} 公顷")
    print(f"平均每县废弃土地面积: {county_stats['abandoned_land_ha'].mean():.2f} 公顷")
    print(f"\n前5个县（按废弃土地面积排序）:")
    display_cols = ['County_name', 'STATEFP', 'COUNTYFP', 'abandoned_land_ha', 'Environmental_suitability_per_ha', 
                   'Emission_mitigation_per_ha', 'Economic_NPV_per_ha']
    print(county_stats[display_cols].head().to_string(index=False))
    return county_stats

# 使用示例
county_stats_df = generate_county_level_csv(merged_data_for_analysis, us_counties_4326)
county_stats_df


正在添加县标签...
正在计算县级统计...
正在保存到: data/US_data/county_level_analysis.csv

县级统计完成！
总县数: 2440
总废弃土地面积: 4376972.78 公顷
平均每县废弃土地面积: 1793.84 公顷

前5个县（按废弃土地面积排序）:
County_name STATEFP COUNTYFP  abandoned_land_ha  Environmental_suitability_per_ha  Emission_mitigation_per_ha  Economic_NPV_per_ha
     Fresno      06      019         38974.1038                            0.5998                   7493.0897          749981.7193
    Hidalgo      48      215         38052.8845                            0.1821                   6713.6434          191699.4433
     Tulare      06      107         33121.2112                            0.4548                   7547.2596          785671.8751
       Will      17      197         31175.2304                            0.7299                   5577.6903         -630879.8341
     Madera      06      039         24980.3094                            0.5779                   7486.6939          774396.9547


  county_stats = data_with_counties.groupby(['STATEFP', 'COUNTYFP', 'NAME'], observed=True).apply(calculate_county_metrics).reset_index(drop=True)


Unnamed: 0,County_name,STATEFP,COUNTYFP,abandoned_land_ha,Environmental_suitability_per_ha,Emission_mitigation_per_ha,Economic_NPV_per_ha,Power_generation_per_ha,CCD_Mean_improvement
0,Fresno,06,019,38974.1038,0.5998,7493.0897,749981.7193,1.842272e+08,0
1,Hidalgo,48,215,38052.8845,0.1821,6713.6434,191699.4433,1.868898e+08,0
2,Tulare,06,107,33121.2112,0.4548,7547.2596,785671.8751,1.865164e+08,0
3,Will,17,197,31175.2304,0.7299,5577.6903,-630879.8341,1.324124e+08,0
4,Madera,06,039,24980.3094,0.5779,7486.6939,774396.9547,1.838907e+08,0
...,...,...,...,...,...,...,...,...,...
2435,Red Lake,27,125,57.6248,0.7776,5498.0444,-699671.8740,1.169154e+08,0
2436,Lincoln,30,053,57.1044,0.0051,6490.9617,30097.2262,1.344336e+08,0
2437,Ramsey,38,071,57.0951,0.1975,5768.0864,-486375.2171,1.212798e+08,0
2438,Boundary,16,021,56.7210,0.0955,5812.4662,-506724.3453,1.199618e+08,0
