## figure S2

In [1]:
import os 
import numpy as np
import pandas as pd
import xgboost as xgb
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt  
import matplotlib.colors as col

from pdpbox import pdp
from matplotlib import cm
from pdpbox import info_plots
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import roc_auc_score
from matplotlib.pyplot import MultipleLocator

plt.rc('font',family='Arial')
plt.rcParams ['svg.fonttype'] ='none'
plt.rcParams ['svg.fonttype'] ='none'
pd.options.mode.chained_assignment = None
%matplotlib inline

### load data

In [26]:
#train 
train = pd.read_csv(r'../../data/total/train.csv',index_col=0)
train['month'] = train['date'].apply(lambda x:pd.Timestamp(x).month)
train = train.drop(columns=['date','lon','lat','Open Water','Urban-Built-up'])
X_train=train.iloc[:,1:]
y_train=train.iloc[:,0]
#test 
test = pd.read_csv(r'../../data/total/test.csv',index_col=0)
test['month'] = test['date'].apply(lambda x:pd.Timestamp(x).month)
test = test.drop(columns=['date','lon','lat','Open Water','Urban-Built-up'])
X_test=test.iloc[:,1:]
y_test=test.iloc[:,0]

### train model

In [27]:
xgb_model = XGBClassifier(
                  n_jobs=4,
                  max_depth=18,
                  learning_rate=0.12,
                  n_estimators=1200,
                  verbosity=1,
                  objective='binary:logistic',
                  booster='gbtree',
                  gamma=0.1,
                  min_child_weight=4,
                  subsample=0.6,
                  colsample_bytree=0.8,
                  reg_alpha=1,
                  reg_lambda=2,
                  base_score=0.5,
                  eval_metric='auc',
          )
xgb_model.fit(X_train, y_train,eval_set=[(X_test, y_test)],early_stopping_rounds=75,verbose=True)

[0]	validation_0-auc:0.64338
Will train until validation_0-auc hasn't improved in 75 rounds.
[1]	validation_0-auc:0.65636
[2]	validation_0-auc:0.65996
[3]	validation_0-auc:0.66433
[4]	validation_0-auc:0.66432
[5]	validation_0-auc:0.67264
[6]	validation_0-auc:0.67614
[7]	validation_0-auc:0.67319
[8]	validation_0-auc:0.68021
[9]	validation_0-auc:0.68014
[10]	validation_0-auc:0.67665
[11]	validation_0-auc:0.67953
[12]	validation_0-auc:0.67888
[13]	validation_0-auc:0.68330
[14]	validation_0-auc:0.68402
[15]	validation_0-auc:0.68903
[16]	validation_0-auc:0.69057
[17]	validation_0-auc:0.69355
[18]	validation_0-auc:0.69460
[19]	validation_0-auc:0.69523
[20]	validation_0-auc:0.69553
[21]	validation_0-auc:0.69807
[22]	validation_0-auc:0.69807
[23]	validation_0-auc:0.69986
[24]	validation_0-auc:0.70238
[25]	validation_0-auc:0.70551
[26]	validation_0-auc:0.70612
[27]	validation_0-auc:0.70685
[28]	validation_0-auc:0.71166
[29]	validation_0-auc:0.71203
[30]	validation_0-auc:0.71582
[31]	validation_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.8, eval_metric='auc',
              gamma=0.1, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.12, max_delta_step=0,
              max_depth=18, min_child_weight=4, missing=nan,
              monotone_constraints='()', n_estimators=1200, n_jobs=4,
              num_parallel_tree=1, random_state=0, reg_alpha=1, reg_lambda=2,
              scale_pos_weight=1, subsample=0.6, tree_method='exact',
              validate_parameters=1, verbosity=1)

### generate partial dependence plot by pdpbox
#### Calculate 2d partial dependence plot___pop_density.vs.light

In [28]:
## 人口密度，还是尽量选择了反映全部的人口密度范围
data_features = list(X_train.columns)

pdp_Pop_dL = pdp.pdp_interact(model=xgb_model,
                                dataset=train,
                                model_features=data_features,
                                features=['Pop', 'distance_light'],
                                num_grid_points=[10, 11],
                                grid_types = ['percentile','percentile'],
                                percentile_ranges=[(0,95),(0,100)],
                                n_jobs=1)

pdp_Pop_dL = pdp.pdp_interact(model=xgb_model,
                                dataset=train,
                                model_features=data_features,
                                features=['Pop', 'distance_light'],
                                cust_grid_points=[pdp_Pop_dL.feature_grids[0].astype(int),pdp_Pop_dL.feature_grids[1].astype(int)],
                                n_jobs=1)
pdp_v = pdp_Pop_dL.pdp.preds.values
pdp_v.resize(8,11)
pd_pdp = pd.DataFrame(pdp_v).T
pd_pdp.columns = np.array(pdp_Pop_dL.feature_grids[0]).astype(int)#.astype(int)
pd_pdp['Distance to light'] = pdp_Pop_dL.feature_grids[1].astype(int)
pd_pdp = pd_pdp.set_index('Distance to light')
pd_pdp.to_csv(r'../../result/figure_data/2d_pdp/figure_S2.csv')

In [None]:
file = r'../../result/figure_data/2d_pdp/figure_S2.csv'
df = pd.read_csv(file).sort_values(by='Distance to light',ascending=False)
df['Distance to light'] = df.apply(lambda x:round(x['Distance to light'],1),axis=1)
df = df.sort_values('Distance to light',ascending=False)
df = df.set_index('Distance to light')
grid_kws = {"width_ratios": (0.9, 0.06)}  
fig, (ax, cbar_ax)  = plt.subplots(1, 2,gridspec_kw=grid_kws,figsize=(10,11)) 
sns.heatmap(data=df,cmap='RdYlBu_r',vmin=0.44,vmax=0.56,center=0.5,linewidths=0,robust=False,square=False,ax=ax,cbar_ax=cbar_ax,cbar_kws={"orientation": "vertical"})
ax.set_xticklabels(df.columns.to_list(),rotation=0,fontdict={'size':22})
ax.set_yticklabels(np.array(df.index).astype(int),rotation=0,fontdict={'size':26})
ax.set_ylabel("Distance to light (Km)",fontdict={'size':26},labelpad=5)
ax.set_xlabel('People/$\mathregular{km^2}$',fontdict={'size':26},labelpad=5)
cbar_ax.set_yticklabels((cbar_ax.get_yticklabels()),fontdict={'size':22})
cbar_ax.margins(0.02)
plt.subplots_adjust(left = None,bottom = None,right =None,top = None,wspace = 0.1,hspace = None)
plt.savefig(r'../../result/figure/2d_pdp/figure_S2.pdf',dpi=300,bbox_inches = 'tight')
plt.savefig(r'../../result/figure/2d_pdp/figure_S2.png',dpi=300,bbox_inches = 'tight')