In [57]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import time
import xesmf as xe
import regionmask
from tqdm import tqdm

import xarray as xr
import dask
dask.config.set(**{'array.slicing.split_large_chunks': True})
import sys
sys.path.insert(0, '/net/fs11/d0/emfreese/BC-IRF/')
import utils


%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
CGP_df = pd.read_csv(f'{utils.data_output_path}plants/BC_SE_Asia_all_financing_SEA_GAINS_Springer_v2.csv')
CGP_df.columns = CGP_df.columns.str.replace(' ', '_')
CGP_df = CGP_df[
        (CGP_df['latitude'].notna()) & 
        (CGP_df['longitude'].notna()) & 
        (CGP_df['BC_(g/day)'].notna()) &
        (CGP_df['BC_(g/day)'] > 0)
    ]
CGP_df = CGP_df.rename(columns = {'YEAR':'Year_of_Commission', 'EMISFACTOR.PLATTS':'CO2_weighted_capacity_1000tonsperMW'})

CGP_df = CGP_df.loc[CGP_df['Year_of_Commission'].dropna().index]


In [59]:
temp_response_global = pd.read_csv(f'{utils.raw_data_in_path}raisanen_2022_BC_temp/praisanen-Black-carbon-radiative-forcing-and-climate-response-tool-182f7d3/results_all_plants/combined_plant_results.csv')

In [60]:
# Check the shapes and indices first
print("temp_response_global shape:", temp_response_global.shape)
print("CGP_df shape:", CGP_df.shape)
print()

# Check if UNITID values match (regardless of order/index)
temp_unitids = set(temp_response_global['UNITID'].dropna())
cgp_unitids = set(CGP_df['UNITID'].dropna())

print(f"Unique UNITIDs in temp_response_global: {len(temp_unitids)}")
print(f"Unique UNITIDs in CGP_df: {len(cgp_unitids)}")
print()

# Check overlap
common_unitids = temp_unitids.intersection(cgp_unitids)
missing_in_temp = cgp_unitids - temp_unitids
missing_in_cgp = temp_unitids - cgp_unitids

print(f"Common UNITIDs: {len(common_unitids)}")
print(f"Missing in temp_response_global: {len(missing_in_temp)}")
print(f"Missing in CGP_df: {len(missing_in_cgp)}")

if len(missing_in_temp) > 0:
    print(f"Sample missing in temp: {list(missing_in_temp)[:10]}")
if len(missing_in_cgp) > 0:
    print(f"Sample missing in CGP: {list(missing_in_cgp)[:10]}")

temp_response_global shape: (367, 24)
CGP_df shape: (367, 74)

Unique UNITIDs in temp_response_global: 367
Unique UNITIDs in CGP_df: 367

Common UNITIDs: 367
Missing in temp_response_global: 0
Missing in CGP_df: 0


In [61]:
#CGP_df = CGP_df.rename(columns = {'BC_(kg/m2/year)':'bc_emission_kg_m2_year'})

In [62]:
CGP_df_rad = CGP_df.merge(temp_response_global[['UNITID', 'longitude', 'latitude','DRF_TOA', 'SNOWRF_TOA', 'SUM_RF_TOA', 'DT_DRF', 'DT_SNOWRF', 'DT_SUM','bc_emission_kg_m2_sec']], 
                          on = ['UNITID','latitude','longitude'], 
                          how = 'inner').drop(columns = ['Unnamed:_0.1','Unnamed:_0'])

In [63]:
#convert from g/day to g/yr
days_per_year = 365

CGP_df_rad['BC_(g/yr)'] = CGP_df_rad['BC_(g/day)'] * days_per_year #g/year
CGP_df_rad.drop(columns = ['BC_(g/day)'], inplace = True)

In [64]:
CGP_df_rad = CGP_df_rad.rename(columns = {'YEAR':'Year_of_Commission', 'EMISFACTOR.PLATTS':'CO2_weighted_capacity_1000tonsperMW',''
                                            'ANNUALCO2':'co2_emissions', 'MW':'MW_total',  'SNOWRF_TOA':'snowrf_toa', 'DRF_TOA':'drf_toa', 
                                          'DT_SNOWRF':'dt_snowrf','DT_DRF':'dt_drf', 'DT_SUM':'dt_sum', 'SUM_RF_TOA':'sum_rf_toa'})

In [65]:
CGP_df_rad.to_csv(f'{utils.data_output_path}plants/BC_SE_Asia_all_financing_SEA_GAINS_Springer_plus_rad.csv')