In [1]:
###########import packages##########
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb
import lightgbm
import catboost
from sklearn.model_selection import GridSearchCV
from xgboost import plot_importance
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn import ensemble
from sklearn.tree import ExtraTreeRegressor
from sklearn import svm
from sklearn import neighbors
from sklearn import tree
import shap
%matplotlib
from pdpbox import pdp
def plot_pdp(model, df, feature, cluster_flag=False, nb_clusters=None, lines_flag=False):
    
    # Create the data that we will plot
    pdp_goals = pdp.pdp_isolate(model=model, dataset=df, model_features=df.columns.tolist(), feature=feature)

    # plot it
    pdp.pdp_plot(pdp_goals, feature, cluster=cluster_flag, n_cluster_centers=nb_clusters, plot_lines=lines_flag)
    plt.show()
###########wrapping root mean square error for later calls##########
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())
###########loading data##########
fdata=pd.read_csv('database.csv',encoding="gbk")
raw_data=fdata.loc[:,[                     
                      'A_c',#0
                      'A_a',#1
                      'L_cl',#2
                      'i0',#3
                      'L-PEM',#4
                      'L_GDL',#5
                      'epsl_cl',#6
                      'epsp_cl',#7
                      'T_cc',#8
                      'RH_cc',#9
                      'pressure',#10
                      'Volt0.7'#
                        ]]
###########train test splitting##########
standardized_data = (raw_data-np.mean(raw_data,axis=0))/np.std(raw_data,axis=0)
raw_input=raw_data.iloc[:,0:11]
raw_output=raw_data.iloc[:,11]
###########fix random seed for reproducability##########
seed=1
X_train, X_test, y_train, y_test = train_test_split(raw_input, raw_output, test_size=.15,random_state=seed)
y_test_values=y_test.values.astype(np.float32)

########################################################
########################################################
###########################PDP##########################
########################################################
########################################################
def shap_plot(model,param):
    print('start')
    grid = GridSearchCV(model,param_grid=param,cv=5)
    grid.fit(X_train,y_train)
    best_model=grid.best_estimator_
    plot_pdp(best_model, raw_input, 'T_cc')
    plt.savefig('T_cc.png')
    plot_pdp(best_model, raw_input, 'i0')
    plt.savefig('i0.png')
    plot_pdp(best_model, raw_input, 'epsl_cl')
    plt.savefig('epsl_cl.png')
    plot_pdp(best_model, raw_input, 'L_cl')
    plt.savefig('L_cl.png')
    plot_pdp(best_model, raw_input, 'RH_cc')
    plt.savefig('RH_cc.png')
    plot_pdp(best_model, raw_input, 'pressure')
    plt.savefig('pressure.png')
    plot_pdp(best_model, raw_input, 'A_c')
    plt.savefig('A_c.png')
    plot_pdp(best_model, raw_input, 'L-PEM')
    plt.savefig('L-PEM.png')
    plot_pdp(best_model, raw_input, 'L_GDL')
    plt.savefig('L_GDL.png')
    plot_pdp(best_model, raw_input, 'A_a')
    plt.savefig('A_a.png')
    plot_pdp(best_model, raw_input, 'epsp_cl')
    plt.savefig('epsp_cl.png')
    print('finished')
    

Using matplotlib backend: Qt5Agg


In [4]:
model_XGBRegressor = xgb.XGBRegressor(random_state=seed)
param_xgb={'learning_rate': [0.2], 'reg_alpha': [0.01], 'reg_lambda': [0], 'subsample': [0.8]}
shap_plot(model_XGBRegressor,param_xgb)

start
finished


In [5]:
model_lgbmRegressor = lightgbm.LGBMRegressor(random_state=seed)
param_lgbm={'learning_rate': [0.2], 'reg_alpha': [0], 'reg_lambda': [0.01], 'subsample': [0.5]}
shap_plot(model_lgbmRegressor,param_lgbm)

start
finished


In [6]:
model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(random_state=seed)
param_GB={'loss': ['ls'], 'max_features': ['auto']}
shap_plot(model_GradientBoostingRegressor,param_GB)

start
finished


In [7]:
model_RandomForestRegressor = ensemble.RandomForestRegressor(random_state=seed)
param_RF = {
    'max_features':['auto']}
shap_plot(model_RandomForestRegressor,param_RF)

start
finished


In [8]:
model_ExtraTreeRegressor = ExtraTreeRegressor(random_state=seed)
param_ET = {'criterion': ['friedman_mse'], 'splitter': ['best']}
shap_plot(model_ExtraTreeRegressor,param_ET)

start
finished


In [9]:
model_DecisionTreeRegressor = tree.DecisionTreeRegressor(random_state=seed)
param_DT = {
         'criterion': ['friedman_mse'], 'max_features': ['auto'], 'splitter': ['best']
}
shap_plot(model_DecisionTreeRegressor,param_DT)

start
finished


In [3]:
model_CatboostRegressor=catboost.CatBoostRegressor(random_state=1,verbose=0)
param_cat = {'iterations':400} 
shap_plot(model_CatboostRegressor,param_cat)

start
finished
