In [2]:
###########import packages##########
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf 
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from xgboost import plot_importance
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
%matplotlib
###########fix random seed for reproducability##########
seed=21
###########wrapping root mean square error for later calls##########
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())
###########loading data##########
fdata=pd.read_csv('database.csv',encoding="gbk")
raw_data=fdata.loc[:,[                     
                      'Anodic Platinum Loading Amount mgPt cm-2',#0
                      'Anodic catalyst type x wt% Pt/C',#1
                      'Hot Press Temperature ℃',#2
                      'Hot Press Time min',#3
                      'Hot Press Pressure Mpa',#4
                      'Humidity %',#5
                      'Operating Temperature ℃',#6
                      'Flowing rate of H2 ml min-1 ',#7
                      'Flowing rate of O2 ml min-1 ',#8
                      'Back Pressure Mpa',#9
                      'Cathodic Loading Amount mg cm-2',#10
                      'Membrane Thickness',#11
                      'I/C Ratio(ionomer/catalyst)',#12
                      'BET Surface Area cm2/g',#13
                      'Micropore ratio',#14
                      'Mesopore ratio',#15
                      'Macropore ratio',#16
                      'Co Cotent wt/%',#17
                      'Fe Cotent wt/%(ICP for metal)',#18
                      'N Content at/%',#19
                      'O Content at/%',#20
                      'S Content at/%',#21
                      'Pyridinic N+metal N Content ratio at/%',#22
                      'Pyrrolic N Content ratio at/%',#23
                      'Graphitic N Content ratio at/%',#24
                      'Oxidized N Content ratio at/%',#25:total 26features
                      'higher than 700'#label
                        ]]
###########handling missing values##########
median_raw_data=raw_data.median()
dict_median_raw_data=median_raw_data.to_dict()
data=raw_data.fillna(dict_median_raw_data)
###########train test splitting##########
raw_param=data.iloc[:,0:26]
raw_power=data.iloc[:,26]
X=raw_param.values.astype(np.float32)
y=raw_power.values.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15,random_state=seed)
###########XGBoost gridsearch CV for best hyperparameter##########
model_XGBClassifier = xgb.XGBClassifier(random_state=seed)
###########defining the parameters dictionary##########
param = {
         'max_depth': [5,7,9,11],
    'learning_rate': [0.01,0.05,0.1],
    'n_estimators': [1000,2000],
    'subsample': [0.6,0.7,0.8,0.9],
    'objective':['reg:squarederror'],
    'lambda':[0.1],
    'alpha':[0.1]
       }
grid = GridSearchCV(model_XGBClassifier,param_grid=param,cv=5)
grid.fit(X_train,y_train)
best_model=grid.best_estimator_
###########generating confusion matrix to evaluate the prediction accuracy##########
print('Best Regressor:',grid.best_params_,'Best Score:', grid.best_score_) 
prediction_train=best_model.predict(X_train)
prediction_test=best_model.predict(X_test)
print(classification_report(y_train,prediction_train))
print(classification_report(y_test,prediction_test))
###########evaluating feature importances##########
print(best_model.feature_importances_)
plot_importance(best_model)
plt.show()

Using matplotlib backend: Qt5Agg
Best Regressor: {'alpha': 0.1, 'lambda': 0.1, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 2000, 'objective': 'reg:squarederror', 'subsample': 0.6} Best Score: 0.7471014492753623
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        81
         1.0       1.00      1.00      1.00        38

    accuracy                           1.00       119
   macro avg       1.00      1.00      1.00       119
weighted avg       1.00      1.00      1.00       119

              precision    recall  f1-score   support

         0.0       0.88      1.00      0.94        15
         1.0       1.00      0.67      0.80         6

    accuracy                           0.90        21
   macro avg       0.94      0.83      0.87        21
weighted avg       0.92      0.90      0.90        21

[0.05773495 0.03508306 0.03792998 0.03572626 0.0149122  0.02908853
 0.01162377 0.01192954 0.07364751 0.01776135 0.02173499 0.