# Demand adjustment to Economic

In [None]:
import pandas as pd
import geopandas as gpd
# Read the compressed CSV file into a DataFrame
energy_df = pd.read_csv('data/US_data/US_electricity/NREL/energy.csv.gzip', compression='gzip')
us_states = gpd.read_file('data/cb_2018_us_state_500k.shp')
us_nation = gpd.read_file('data/US_data/cb_2018_us_nation_5m.shp')
df_net_benefit_1=pd.read_csv('data/5.1_net_expected_benefit_results.csv')


In [32]:
energy_df['STATE'].unique()

array(['ALABAMA', 'ALASKA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA',
       'COLORADO', 'CONNECTICUT', 'DELAWARE', 'DISTRICT OF COLUMBIA',
       'FLORIDA', 'GEORGIA', 'HAWAII', 'IDAHO', 'ILLINOIS', 'INDIANA',
       'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MAINE', 'MARYLAND',
       'MASSACHUSETTS', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI',
       'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEVADA', 'NEW HAMPSHIRE',
       'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NORTH CAROLINA',
       'NORTH DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA',
       'RHODE ISLAND', 'SOUTH CAROLINA', 'SOUTH DAKOTA', 'TENNESSEE',
       'TEXAS', 'UTAH', 'VERMONT', 'VIRGINIA', 'WASHINGTON',
       'WEST VIRGINIA', 'WISCONSIN', 'WYOMING'], dtype=object)

In [None]:
# For each scenario, print the length of the filtered DataFrame using the same filter except for SCENARIO
for scenario in energy_df['SCENARIO'].unique():
    filtered_df = energy_df[
        (energy_df['STATE'] == 'CALIFORNIA') &
        (energy_df['YEAR'] == 2050) &
        (energy_df['SECTOR'] == 'RESIDENTIAL') &
        (energy_df['SCENARIO'] == scenario) &
        (energy_df['FINAL_ENERGY'] == 'ELECTRICITY')
    ]
    print(f"Scenario: {scenario}, Number of rows: {len(filtered_df)}")

filtered_df['MMBTU'].sum()

Scenario: HIGH ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: MEDIUM ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: REFERENCE ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: LOW ELECTRICITY GROWTH - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 83
Scenario: ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78


In [48]:
# Create summary dataframe for each state, scenario, sector, and year
# Filter for ELECTRICITY only
energy_electricity = energy_df[energy_df['FINAL_ENERGY'] == 'ELECTRICITY'].copy()

# Filter for valid years (2017-2050)
valid_years = list(range(2017, 2051))
energy_electricity = energy_electricity[energy_electricity['YEAR'].isin(valid_years)].copy()

# Group by STATE, SCENARIO, SECTOR, and YEAR, then sum MMBTU
energy_summary = energy_electricity.groupby(['STATE', 'SCENARIO', 'SECTOR', 'YEAR'])['MMBTU'].sum().reset_index()

# Convert STATE name to match us_states format (title case)
energy_summary['STATE_TITLE'] = energy_summary['STATE'].str.title()

# Merge with us_states geodataframe
energy_summary_gdf = pd.merge(
    energy_summary,
    us_states[['NAME', 'geometry']],
    left_on='STATE_TITLE',
    right_on='NAME',
    how='left'
)

# Convert to GeoDataFrame
energy_summary_gdf = gpd.GeoDataFrame(energy_summary_gdf, geometry='geometry', crs=us_states.crs)

# Display summary
print(f"Total rows in summary: {len(energy_summary_gdf)}")
print(f"States covered: {energy_summary_gdf['NAME'].nunique()}")
print(f"Scenarios: {energy_summary_gdf['SCENARIO'].nunique()}")
print(f"Sectors: {energy_summary_gdf['SECTOR'].nunique()}")
print(f"Years: {energy_summary_gdf['YEAR'].nunique()} ({energy_summary_gdf['YEAR'].min()}-{energy_summary_gdf['YEAR'].max()})")
print("\nFirst few rows:")
energy_summary_gdf


Total rows in summary: 34680
States covered: 50
Scenarios: 5
Sectors: 4
Years: 34 (2017-2050)

First few rows:


Unnamed: 0,STATE,SCENARIO,SECTOR,YEAR,MMBTU,STATE_TITLE,NAME,geometry
0,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,COMMERCIAL,2017,7.063012e+07,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
1,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,COMMERCIAL,2018,7.142212e+07,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
2,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,COMMERCIAL,2019,7.224714e+07,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
3,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,COMMERCIAL,2020,7.315417e+07,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
4,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,COMMERCIAL,2021,7.401815e+07,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
...,...,...,...,...,...,...,...,...
34675,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,TRANSPORTATION,2046,8.069778e+05,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
34676,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,TRANSPORTATION,2047,8.153339e+05,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
34677,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,TRANSPORTATION,2048,8.236091e+05,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
34678,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,TRANSPORTATION,2049,8.318235e+05,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."


In [44]:
# Alternative: Summary by STATE, SCENARIO, and YEAR (sum across all sectors)
energy_summary_by_state_scenario_year = energy_electricity.groupby(['STATE', 'SCENARIO', 'YEAR'])['MMBTU'].sum().reset_index()
energy_summary_by_state_scenario_year['STATE_TITLE'] = energy_summary_by_state_scenario_year['STATE'].str.title()

# Merge with us_states
energy_summary_by_state_scenario_year_gdf = pd.merge(
    energy_summary_by_state_scenario_year,
    us_states[['NAME', 'geometry']],
    left_on='STATE_TITLE',
    right_on='NAME',
    how='left'
)
energy_summary_by_state_scenario_year_gdf = gpd.GeoDataFrame(
    energy_summary_by_state_scenario_year_gdf, 
    geometry='geometry', 
    crs=us_states.crs
)

print("Summary by STATE, SCENARIO, and YEAR (all sectors combined):")
print(f"Total rows: {len(energy_summary_by_state_scenario_year_gdf)}")
energy_summary_by_state_scenario_year_gdf


Summary by STATE, SCENARIO, and YEAR (all sectors combined):
Total rows: 8670


Unnamed: 0,STATE,SCENARIO,YEAR,MMBTU,STATE_TITLE,NAME,geometry
0,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2017,2.457755e+08,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
1,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2018,2.692848e+08,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
2,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2019,2.920597e+08,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
3,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2020,3.133971e+08,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
4,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2021,3.330328e+08,Alabama,Alabama,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ..."
...,...,...,...,...,...,...,...
8665,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2046,3.816260e+07,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
8666,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2047,3.846013e+07,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
8667,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2048,3.874785e+07,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."
8668,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2049,3.905937e+07,Wyoming,Wyoming,"POLYGON ((-111.05456 45.00096, -111.04507 45.0..."


In [None]:
# 修复美国某些州（如DISTRICT OF COLUMBIA）无法匹配geometry的问题
unmatched = energy_summary_gdf[energy_summary_gdf['geometry'].isna()]
if len(unmatched) > 0:
    print("Warning: Some states could not be matched with geometry:\n")
    print(unmatched[['STATE', 'STATE_TITLE', 'NAME']].drop_duplicates())
    print("\n尝试修复匹配：")
    dc_idx = (energy_summary_gdf['STATE_TITLE'] == 'District Of Columbia') & (energy_summary_gdf['geometry'].isna())
    dc_geom_row = us_states[us_states['NAME'].str.upper() == 'DISTRICT OF COLUMBIA']
    if not dc_geom_row.empty and dc_idx.any():
        energy_summary_gdf.loc[dc_idx, 'geometry'] = dc_geom_row.iloc[0]['geometry']
        energy_summary_gdf.loc[dc_idx, 'NAME'] = dc_geom_row.iloc[0]['NAME']
        print("✔ DISTRICT OF COLUMBIA geometry已修复匹配。")
    else:
        print("× DISTRICT OF COLUMBIA匹配失败。")
    unmatched2 = energy_summary_gdf[energy_summary_gdf['geometry'].isna()]
    if len(unmatched2) == 0:
        print("✓ 所有states已成功匹配geometry！")
    else:
        print("仍有以下states无法匹配geometry：")
        print(unmatched2[['STATE', 'STATE_TITLE', 'NAME']].drop_duplicates())
else:
    print("✓ All states successfully matched with geometry!")



                     STATE           STATE_TITLE NAME
5440  DISTRICT OF COLUMBIA  District Of Columbia  NaN

尝试修复匹配：
✔ DISTRICT OF COLUMBIA geometry已修复匹配。
✓ 所有states已成功匹配geometry！


In [59]:
# Example: HIGH ELECTRIFICATION scenario
scenario_name = 'HIGH ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT'
energy_pivot_state_year = energy_summary_by_state_scenario_year[
    energy_summary_by_state_scenario_year['SCENARIO'] == scenario_name
].pivot_table(
    index='STATE',
    columns='YEAR',
    values='MMBTU',
    aggfunc='sum'
)

print(f"Pivot table: STATE (rows) x YEAR (columns) for scenario: {scenario_name}")
print(f"Shape: {energy_pivot_state_year.shape}")
energy_pivot_state_year


Pivot table: STATE (rows) x YEAR (columns) for scenario: HIGH ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT
Shape: (51, 34)


YEAR,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALABAMA,245880500.0,247036500.0,248984000.0,250479900.0,252764000.0,256218100.0,258979000.0,261676300.0,264062600.0,266645700.0,...,359158300.0,365627100.0,372024900.0,378208400.0,384226400.0,390315100.0,396260900.0,402206700.0,408277900.0,414420000.0
ALASKA,25524030.0,25864760.0,26088020.0,26255810.0,26513660.0,26787380.0,27081690.0,27390820.0,27687290.0,28028990.0,...,38860040.0,39612170.0,40376010.0,41111680.0,41826650.0,42541290.0,43228230.0,43964340.0,44714960.0,45467950.0
ARIZONA,228209500.0,228681600.0,229330700.0,229839800.0,231008500.0,232160300.0,233806700.0,235734800.0,237476200.0,239946900.0,...,332215200.0,338603500.0,344940500.0,351002700.0,356813900.0,362597500.0,368003200.0,373215700.0,378391700.0,383477400.0
ARKANSAS,141757500.0,143828500.0,145530100.0,146676600.0,148179700.0,150239900.0,152149300.0,153998000.0,155670000.0,157447600.0,...,215105200.0,219477200.0,223801000.0,227859600.0,231685700.0,235446900.0,238993800.0,242464000.0,245960200.0,249450600.0
CALIFORNIA,1126599000.0,1140039000.0,1148611000.0,1157148000.0,1168768000.0,1180188000.0,1192776000.0,1206184000.0,1218330000.0,1233616000.0,...,1767194000.0,1806432000.0,1845696000.0,1883568000.0,1920321000.0,1957277000.0,1992466000.0,2030076000.0,2068386000.0,2106597000.0
COLORADO,175244700.0,177094800.0,178952200.0,180572800.0,182613200.0,184667900.0,187064400.0,189540400.0,191786600.0,194463400.0,...,271675500.0,276872300.0,282038400.0,287015400.0,291812700.0,296613700.0,301136000.0,305504900.0,309840300.0,314118800.0
CONNECTICUT,107514600.0,108752700.0,109608400.0,110557500.0,111668400.0,112806200.0,113941100.0,115095900.0,116204200.0,117608800.0,...,168624200.0,172189600.0,175702600.0,179095300.0,182405100.0,185781000.0,188966500.0,192125600.0,195262500.0,198324400.0
DELAWARE,46150270.0,46693470.0,47235920.0,47649540.0,48200300.0,48834400.0,49459230.0,49954960.0,50356400.0,50830990.0,...,62509860.0,63350940.0,64222270.0,65055270.0,65868430.0,66683210.0,67485350.0,68280940.0,69092920.0,69934360.0
DISTRICT OF COLUMBIA,43342730.0,43613980.0,43949710.0,44321890.0,44792400.0,45323860.0,45868230.0,46426740.0,47003640.0,47674040.0,...,69208270.0,70925720.0,72622990.0,74260350.0,75850460.0,77402340.0,78937670.0,80441520.0,81928880.0,83408460.0
FLORIDA,838426600.0,841136700.0,842473300.0,844253600.0,848430800.0,854287600.0,860431000.0,866806500.0,873200000.0,881550300.0,...,1178521000.0,1199951000.0,1221385000.0,1242255000.0,1262678000.0,1283337000.0,1303528000.0,1323533000.0,1343753000.0,1364062000.0


In [62]:
energy_summary_by_state_scenario_year

Unnamed: 0,STATE,SCENARIO,YEAR,MMBTU,STATE_TITLE
0,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2017,2.457755e+08,Alabama
1,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2018,2.692848e+08,Alabama
2,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2019,2.920597e+08,Alabama
3,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2020,3.133971e+08,Alabama
4,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,2021,3.330328e+08,Alabama
...,...,...,...,...,...
8665,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2046,3.816260e+07,Wyoming
8666,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2047,3.846013e+07,Wyoming
8667,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2048,3.874785e+07,Wyoming
8668,WYOMING,REFERENCE ELECTRIFICATION - MODERATE TECHNOLOG...,2049,3.905937e+07,Wyoming


# Adjust for Each Scenario in US



核心需求：针对df_economic['net_npv_usd']进行多情景的需求调整，以体现区域异质性。


计算逻辑：以多情景为单位，取2020-2050年的各个情景、各个州的需求数据。首先需要根据实际的state_analysis_df实际匹配的本土48个州计算总体各个州逐情景、逐个州、逐年的总体均值（并且额外增加一个均值情景，各个Scenario取平均的情景）。其次，计算该情景特定年份下，单个州与总体均值的比例，adjust_ratio= state[demand]/overall_mean[demand]，并且生成dataframe。最后，use df_economic带上州标签的['net_npv_usd']点数据去匹配adjust_ratio。



以下为伪代码：

```python 

for each point in df_economic:
    # first us use us_states_4326 to give states name (only for main land of US)
    # 1. 添加州标签
    print("正在添加州标签...")
    geometry = [Point(xy) for xy in zip(df_economic['lon'], df_economic['lat'])]
    pixel_gdf = gpd.GeoDataFrame(df_economic, geometry=geometry, crs='EPSG:4326')
    pixel_with_states = gpd.sjoin(pixel_gdf, us_states_4326, how='left', predicate='within')
    
    # 移除几何列，保留数据
    data_with_states = pixel_with_states.drop(columns=['geometry']).copy()



```

 数据结构树形图：
 
 state_analysis_df: data/US_data/US_analysis_reslut/state_level_analysis_with_wccd.csv
 └── DataFrame
     └── State_name
 

 df_economic: data/US_data/df_economic.csv
 └── DataFrame
     └──  net_npv_usd、lon、lat 等 

 energy_summary_by_state_scenario_year
 └── DataFrame
     ├── SCENARIO
     ├── YEAR
     └──MMBTU
```
Scenario: HIGH ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: MEDIUM ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: REFERENCE ELECTRIFICATION - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78
Scenario: LOW ELECTRICITY GROWTH - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 83
Scenario: ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE TECHNOLOGY ADVANCEMENT, Number of rows: 78

```
     
    


In [78]:
df_economic = pd.read_csv('data/US_data/df_economic.csv')
us_states = gpd.read_file(r'data\cb_2018_us_state_500k.shp')
us_states_4326 = us_states.to_crs('EPSG:4326')
state_analysis_df = pd.read_csv('data/US_data/US_analysis_reslut/state_level_analysis_with_wccd.csv')



In [None]:
# Step 1: Determine 48 mainland states (exclude Alaska and Hawaii)
# Get state names from state_analysis_df
if 'State_name' in state_analysis_df.columns:
    mainland_48_states = state_analysis_df['State_name'].unique().tolist()
elif 'State' in state_analysis_df.columns:
    mainland_48_states = state_analysis_df['State'].unique().tolist()
else:
    # If column name is different, show available columns and use first column
    print("Available columns:", state_analysis_df.columns.tolist())
    mainland_48_states = state_analysis_df.iloc[:, 0].unique().tolist()

# Convert to uppercase to match energy_df STATE format
mainland_48_states_upper = [state.upper() for state in mainland_48_states]

# Step 2: Filter energy data for year 2050 and mainland 48 states only

energy_2050 = energy_summary_by_state_scenario_year[
    (energy_summary_by_state_scenario_year['YEAR'] == 2050) &
    (energy_summary_by_state_scenario_year['STATE'].isin(mainland_48_states_upper))
].copy()

# Step 3: Calculate demand adjust ratio using optimized function
import numpy as np

def compute_demand_adjust_ratio(energy_2050, alpha=0.3):
    """
    根据州级 2050 电力需求, 计算需求微调系数 R_s
    
    energy_2050: 至少包含 ['STATE', 'SCENARIO', 'MMBTU'] 或等价的需求列
    alpha: 调整强度参数，默认0.3
    """
    out = []
    
    for scen, df_scen in energy_2050.groupby('SCENARIO'):
        df_scen = df_scen.copy()
        E = df_scen['MMBTU'].astype(float).values     
        E_med = np.median(E)
        ratio = E / E_med
        R = ratio ** alpha                            
        
        df_scen['demand_adjust_ratio'] = R
        out.append(df_scen[['STATE', 'SCENARIO', 'demand_adjust_ratio']])
    
    return pd.concat(out, ignore_index=True)

# Calculate adjust ratios using the optimized function
adjust_ratio_df = compute_demand_adjust_ratio(energy_2050, alpha=0.3)

print(f"Adjust ratio dataframe shape: {adjust_ratio_df.shape}")
print(f"\nAdjust ratio statistics:")
print(adjust_ratio_df['demand_adjust_ratio'].describe())
print(f"\nAdjust ratio range: [{adjust_ratio_df['demand_adjust_ratio'].min():.3f}, {adjust_ratio_df['demand_adjust_ratio'].max():.3f}]")
print(f"\nSample adjust ratios:")
adjust_ratio_df.head(10)

Adjust ratio dataframe shape: (240, 3)

Adjust ratio statistics:
count    240.000000
mean       0.974115
std        0.280890
min        0.501259
25%        0.762594
50%        1.000000
75%        1.112015
max        1.742920
Name: demand_adjust_ratio, dtype: float64

Adjust ratio range: [0.501, 1.743]

Sample adjust ratios:


Unnamed: 0,STATE,SCENARIO,demand_adjust_ratio
0,ALABAMA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,1.045899
1,ARIZONA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,1.014011
2,ARKANSAS,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,0.898285
3,CALIFORNIA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,1.700557
4,COLORADO,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,0.963367
5,CONNECTICUT,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,0.832233
6,DELAWARE,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,0.590327
7,FLORIDA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,1.472238
8,GEORGIA,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,1.245753
9,IDAHO,ELECTRIFICATION TECHNICAL POTENTIAL - MODERATE...,0.69906


In [76]:
# Step 4: Create MEAN SCENARIO (average across all scenarios)
# For each state, calculate the mean MMBTU across all scenarios
mean_scenario_data = energy_2050.groupby('STATE')['MMBTU'].mean().reset_index()
mean_scenario_data['SCENARIO'] = 'MEAN SCENARIO'

# Calculate adjust ratio for MEAN SCENARIO using the same function
# Create a temporary dataframe with MEAN SCENARIO data
mean_energy_2050 = mean_scenario_data[['STATE', 'SCENARIO', 'MMBTU']].copy()
mean_scenario_adjust = compute_demand_adjust_ratio(mean_energy_2050, alpha=0.3)

# Add to adjust_ratio_df
adjust_ratio_df = pd.concat([adjust_ratio_df, mean_scenario_adjust], ignore_index=True)

print(f"After adding MEAN SCENARIO:")
print(f"  Total rows: {len(adjust_ratio_df)}")
print(f"  Scenarios: {adjust_ratio_df['SCENARIO'].nunique()}")
print(f"\nMEAN SCENARIO adjust ratio statistics:")
mean_ratios = adjust_ratio_df[adjust_ratio_df['SCENARIO'] == 'MEAN SCENARIO']['demand_adjust_ratio']
print(mean_ratios.describe())
print(f"\nMEAN SCENARIO sample:")
adjust_ratio_df[adjust_ratio_df['SCENARIO'] == 'MEAN SCENARIO'].head(10)


After adding MEAN SCENARIO:
  Total rows: 288
  Scenarios: 6

MEAN SCENARIO adjust ratio statistics:
count    48.000000
mean      0.972820
std       0.282248
min       0.514214
25%       0.759324
50%       0.999878
75%       1.107334
max       1.715094
Name: demand_adjust_ratio, dtype: float64

MEAN SCENARIO sample:


Unnamed: 0,STATE,SCENARIO,demand_adjust_ratio
240,ALABAMA,MEAN SCENARIO,1.039547
241,ARIZONA,MEAN SCENARIO,1.011065
242,ARKANSAS,MEAN SCENARIO,0.894043
243,CALIFORNIA,MEAN SCENARIO,1.675283
244,COLORADO,MEAN SCENARIO,0.954179
245,CONNECTICUT,MEAN SCENARIO,0.823156
246,DELAWARE,MEAN SCENARIO,0.612212
247,FLORIDA,MEAN SCENARIO,1.485454
248,GEORGIA,MEAN SCENARIO,1.245484
249,IDAHO,MEAN SCENARIO,0.688985


In [77]:
# Step 5: Add state labels to df_economic using spatial join
from shapely.geometry import Point

print("正在添加州标签...")
print(f"Total df_economic rows: {len(df_economic)}")

# Filter for analysis_year == 2050
df_economic_2050 = df_economic[df_economic['analysis_year'] == 2050].copy()
print(f"Rows with analysis_year == 2050: {len(df_economic_2050)}")

# Create geometry from lat/lon
geometry = [Point(xy) for xy in zip(df_economic_2050['lon'], df_economic_2050['lat'])]
pixel_gdf = gpd.GeoDataFrame(df_economic_2050, geometry=geometry, crs='EPSG:4326')

# Spatial join with us_states_4326
pixel_with_states = gpd.sjoin(pixel_gdf, us_states_4326[['NAME', 'geometry']], 
                               how='left', predicate='within')

# Convert state name to uppercase to match energy data
pixel_with_states['STATE'] = pixel_with_states['NAME'].str.upper()

# Remove geometry and index_right columns, keep data
df_economic_with_states = pixel_with_states.drop(columns=['geometry', 'index_right']).copy()

print(f"\nRows with state labels: {len(df_economic_with_states)}")
print(f"States matched: {df_economic_with_states['STATE'].notna().sum()}")
print(f"States unmatched: {df_economic_with_states['STATE'].isna().sum()}")
print(f"\nUnique states in matched data: {df_economic_with_states['STATE'].nunique()}")
df_economic_with_states.head()


正在添加州标签...
Total df_economic rows: 74557220
Rows with analysis_year == 2050: 18639305


NameError: name 'us_states_4326' is not defined

In [None]:
# Step 6: Match df_economic with adjust_ratio and calculate adjusted net_npv_usd
# Note: Since df_economic doesn't have SCENARIO column, we need to create adjusted versions for each scenario
# We'll create a new column for each scenario's adjusted value, plus a MEAN SCENARIO version

scenarios = adjust_ratio_df['SCENARIO'].unique()
print(f"Available scenarios: {scenarios}")

# Start with df_economic_with_states
df_economic_adjusted = df_economic_with_states.copy()

# Merge adjust_ratio for each scenario
for scenario in scenarios:
    # Create a clean scenario name for column names (replace spaces and hyphens with underscores)
    scenario_clean = scenario.replace(" ", "_").replace("-", "_")
    
    # Get adjust ratios for this scenario
    scenario_ratios = adjust_ratio_df[adjust_ratio_df['SCENARIO'] == scenario][['STATE', 'demand_adjust_ratio']].copy()
    scenario_ratios = scenario_ratios.rename(columns={'demand_adjust_ratio': f'adjust_ratio_{scenario_clean}'})
    
    # Merge on STATE
    df_economic_adjusted = df_economic_adjusted.merge(
        scenario_ratios,
        on='STATE',
        how='left'
    )
    
    # Calculate adjusted net_npv_usd for this scenario
    ratio_col = f'adjust_ratio_{scenario_clean}'
    adjusted_col = f'net_npv_usd_adjusted_{scenario_clean}'
    
    if ratio_col in df_economic_adjusted.columns:
        df_economic_adjusted[adjusted_col] = df_economic_adjusted['net_npv_usd'] * df_economic_adjusted[ratio_col].fillna(1.0)

print(f"\nAdjusted dataframe shape: {df_economic_adjusted.shape}")
print(f"\nNew columns created:")
new_cols = [col for col in df_economic_adjusted.columns if 'adjust' in col.lower() or 'adjusted' in col.lower()]
print(new_cols)
print(f"\nSample data:")
sample_cols = ['lat', 'lon', 'STATE', 'net_npv_usd'] + [col for col in new_cols if 'MEAN' in col][:2]
df_economic_adjusted[sample_cols].head(10)


In [None]:
# Summary: Show statistics of adjustment
print("=== Adjustment Summary ===")
print(f"\nOriginal net_npv_usd statistics:")
print(df_economic_adjusted['net_npv_usd'].describe())

# Show statistics for MEAN SCENARIO adjusted values
mean_scenario_col = [col for col in df_economic_adjusted.columns if 'MEAN_SCENARIO' in col and 'adjusted' in col]
if mean_scenario_col:
    print(f"\nMEAN SCENARIO adjusted net_npv_usd statistics:")
    print(df_economic_adjusted[mean_scenario_col[0]].describe())

# Show sample of states and their adjust ratios
print(f"\n=== Sample State Adjust Ratios (MEAN SCENARIO) ===")
mean_ratios = adjust_ratio_df[adjust_ratio_df['SCENARIO'] == 'MEAN SCENARIO'].sort_values('demand_adjust_ratio', ascending=False)
print(mean_ratios.head(10))
print(f"\n...")
print(mean_ratios.tail(10))

# Save the adjusted dataframe
output_path = 'data/US_data/df_economic_adjusted.csv'
df_economic_adjusted.to_csv(output_path, index=False)
print(f"\n✓ Adjusted dataframe saved to: {output_path}")
print(f"  Total rows: {len(df_economic_adjusted)}")
print(f"  Total columns: {len(df_economic_adjusted.columns)}")


In [64]:
state_analysis_df = pd.read_csv('data/US_data/US_analysis_reslut/state_level_analysis_with_wccd.csv')
state_analysis_df

Unnamed: 0,State_name,abandoned_land_ha,Environmental_suitability_per_ha,Emission_mitigation_per_ha,Economic_NPV_per_ha,Power_generation_per_ha,CCD_Mean,Environmental_improvement_mean,Emission_improvement_mean,Economic_improvement_mean,Environmental_improvement_std,Emission_improvement_std,Economic_improvement_std,Overall_improvement_mean,Overall_improvement_std
0,Texas,515964.2179,0.683,5139.7129,265003.9,73731700.0,0.8568,-3.241348,-5.511587,14.950552,10.985194,19.532784,29.62742,2.065872,12.382731
1,Illinois,295397.0348,0.8765,4404.5404,-534005.6,63165940.0,0.7721,22.533255,15.106307,-15.35444,24.575347,21.401723,22.360703,7.428374,13.173975
2,California,272486.837,0.7859,5751.8194,778756.6,80536310.0,0.9417,17.272332,18.375174,18.264472,17.865098,24.911622,25.697463,17.97066,13.333793
3,Michigan,246010.0211,0.921,4120.8294,-761679.7,60152460.0,0.7518,112.543002,103.215075,-108.139994,82.871152,78.340566,82.040502,35.872694,46.827718
4,Georgia,244587.2796,0.9197,4929.5425,-45824.53,69626760.0,0.8676,-8.747075,-10.220664,11.666008,4.865703,4.283493,4.869929,-2.43391,2.702665
5,Indiana,223562.5553,0.8773,4307.9087,-613949.3,62108790.0,0.7554,25.42776,15.503138,-17.835047,35.431255,30.701113,32.852408,7.698617,19.082315
6,Wisconsin,209296.2168,0.8661,4187.8514,-741423.7,60420670.0,0.7219,91.167412,72.586873,-76.162753,65.65549,56.909662,58.90223,29.197177,34.990198
7,Montana,171379.1278,0.6876,4822.07,-148267.0,68266430.0,0.7623,14.398439,4.96087,9.009175,25.992949,28.727299,23.864083,9.456161,15.167163
8,North Carolina,167697.8948,0.9252,4729.6201,-205584.4,67519370.0,0.8549,8.243395,5.488989,-6.368418,12.114889,10.953534,11.64976,2.454655,6.687196
9,Ohio,156198.4388,0.8582,4277.8627,-617258.9,62064930.0,0.7425,25.251174,12.693016,-14.309947,33.960275,28.079375,29.68938,7.878081,17.711299
