In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
#import regionmask
import pandas as pd
from geopy.geocoders import Nominatim
import xesmf as xe

In [2]:
#import data from:
### Gallagher, Kevin P. (2021), “China’s Global Energy Finance,” Global Development Policy Center, Boston University.
### Gallagher, Kevin P., Li, Zhongshu, Chen, Xu, Ma, Xinyue (2019), “China’s Global Power Database,” Global Development Policy Center, Boston University.

In [3]:
CGP_df = pd.read_excel('BU_data/CGP-Database_2020-1-2.xlsx')

CGEF_df = pd.read_excel('BU_data/BU_CGEF_2020_Update-1.xlsx')


GEM_df = pd.read_excel('GEM_data/Global-Coal-Plant-Tracker-Jan-2022.xlsx', sheet_name = 'Units')

In [5]:
CGP_df = CGP_df.loc[CGP_df['Technology'] == 'Coal']

CGEF_df = CGEF_df.loc[CGEF_df['EnergySource'] == 'Coal']

In [7]:
#CGEF_df['Country'] = CGEF_df['Country'].str.lower()
CGP_df['Country'] = CGP_df['Country'].str.lower()

CGEF_df = CGEF_df.rename(columns = {'Description':'Plant Name'})

CGEF_df['Plant Name'] = CGEF_df['Plant Name'].str.replace('Power Plant', '')
CGEF_df['Plant Name'] = CGEF_df['Plant Name'].str.replace('Thermal', '')
CGEF_df['Plant Name'] = CGEF_df['Plant Name'].str.replace('Unit', '')
CGEF_df['Plant Name'] = CGEF_df['Plant Name'].str.replace('Power Station', '')
CGEF_df['Plant Name'] = CGEF_df['Plant Name'].str.replace('Phase', '')

In [9]:
for idx in CGP_df.index:
    geolocator = Nominatim(timeout=10, user_agent = "myGeolocator")
    location = geolocator.geocode([CGP_df['State'].loc[idx],CGP_df['Country'].loc[idx]])
    CGP_df.loc[idx, 'lat'] = location.latitude
    CGP_df.loc[idx, 'lon'] = location.longitude
    CGP_df.loc[idx, 'location'] = location[0]

In [10]:
## https://pubs.acs.org/doi/abs/10.1021/es3003684 Table S1 
EF = np.exp(-3.64) #g/kg BC/coal

## https://www.nap.edu/read/9736/chapter/8 
HHF = 22.51 #GJ/t

#conversion factors
GJ_to_MwH = .28

Mw_to_MwH = 24 #daily

ton_to_kg = 0.001 #metric tons

MW_gpDay = Mw_to_MwH/GJ_to_MwH/HHF*ton_to_kg*EF #g/day


In [11]:
CGP_df = CGP_df.loc[CGP_df['Region'] == 'Southeast Asia']

In [12]:
CGP_df.loc[:,'BC (g/day)'] = CGP_df['Capacity (MW)']*MW_gpDay

In [13]:
CGP_df = CGP_df.groupby(['Plant Name','lat','lon']).agg({'Capacity (MW)':'first', 'Project Status':'first', 'Year of Commission':'first',
       'Technology':'first', 'State':'first', 'Country':'first', 'Region':'first',
       'Estimated Annual CO2 Emission from Power Generation (1000 ton)':'first',
       'Deal type':'first', 'FDI?':'first', 'Investing Company':'first',
       'Investing Country Ownership %':'first', 'FDI Deal Type':'first',
       'Involving Chinese Policy bank?':'first', 'Lender':'first', 'Borrower':'first', 'Source ':'first',
       'location':'first', 'BC (g/day)':'first'})

In [14]:
CGP_ds = xr.Dataset.from_dataframe(CGP_df)

## Make datasets of different interests
Right now testing for operation status, year of construction, location (country) and lender

In [52]:
operating_ds = CGP_ds.where(CGP_ds["Project Status"] == 'In Operation', drop = True)
operating_ds.attrs = {'Name':'operating'}
construction_ds = CGP_ds.where(CGP_ds["Project Status"] == 'Under Construction', drop = True)
construction_ds.attrs = {'Name':'construction'}
planning_ds = CGP_ds.where(CGP_ds["Project Status"] == 'Under Planning', drop = True)
planning_ds.attrs = {'Name':'planning'}

In [53]:
year = 2016
operating_post_2016_ds = operating_ds.where(operating_ds['Year of Commission'] >= year, drop = True)
operating_post_2016_ds.attrs = {'Name':'post_2016'}
operating_pre_2016_ds = operating_ds.where(operating_ds['Year of Commission'] < year, drop = True)
operating_pre_2016_ds.attrs = {'Name':'pre_2016'}

In [54]:
indonesia_ds = operating_ds.where(operating_ds['Country'] == 'indonesia', drop = True)
indonesia_ds.attrs = {'Name':'indonesia'}
vietnam_ds = operating_ds.where(operating_ds['Country'] == 'vietnam', drop = True)
vietnam_ds.attrs = {'Name':'vietnam'}
cambodia_ds = operating_ds.where(operating_ds['Country'] == 'cambodia', drop = True)
cambodia_ds.attrs = {'Name':'cambodia'}
singapore_ds = operating_ds.where(operating_ds['Country'] == 'singapore', drop = True)
singapore_ds.attrs = {'Name':'singapore'}
malaysia_ds = operating_ds.where(operating_ds['Country'] == 'malaysia', drop = True)
malaysia_ds.attrs = {'Name':'malaysia'}

In [55]:
cdb_ds = operating_ds.where(operating_ds['Lender'] == 'CDB', drop = True)
cdb_ds.attrs = {'Name':'cdb'}
cdb_exim_ds = operating_ds.where(operating_ds['Lender'] == 'CDB-Ex-Im Cofinancing', drop = True)
cdb_exim_ds.attrs = {'Name':'cdb_exim'}
exim_ds = operating_ds.where(operating_ds['Lender'] == 'Ex-Im Bank', drop = True)
exim_ds.attrs = {'Name':'exim'}

In [56]:
ds_list = [operating_ds, construction_ds, planning_ds, operating_post_2016_ds, 
           operating_pre_2016_ds, indonesia_ds, vietnam_ds, cambodia_ds, 
           singapore_ds, malaysia_ds, cdb_ds, cdb_exim_ds,exim_ds]

## Final Step: Regrid datasets

In [57]:
regrid_ds = xr.open_dataset('/net/fs11/d0/emfreese/GCrundirs/IRF_runs/regrid_files/regular_lat_lon_180x288.nc')

In [58]:
def regrid(ds, regrid_ds):
    ds = ds.fillna(0)
    regridder = xe.Regridder(ds, regrid_ds, 'nearest_d2s') #check this usage
    regridder
    return(regridder(ds['BC (g/day)']))

In [59]:
datasets = {}

In [62]:
for ds in ds_list:
    datasets[ds.attrs['Name']] = regrid(ds, regrid_ds)

In [77]:
operating_ds['BC (g/day)'].drop(dim = 'Plant Name')

KeyError: '[None] not found in axis'