# Partial Dependence Plots (PDPs)

In [6]:
import dalex as dx

import joblib

In [1]:
%run ../data/data.py

In [4]:
df = load_adult_data('train')
y = df['Income']
X = df.drop(columns='Income')

In [15]:
adult_rf = joblib.load('../models/adult_rf.pkl')

In [16]:
adult_rf.fit(X,y)

Pipeline(steps=[('features',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('numeric',
                                                  Pipeline(steps=[('standardscaler',
                                                                   StandardScaler())]),
                                                  Index(['Age', 'Final Weight', 'Years of Education', 'Capital Gain',
       'Capital Loss', 'Hours per Week'],
      dtype='object')),
                                                 ('low_cardinality',
                                                  Pipeline(steps=[('onehotencoder',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  Index(['Workclass', 'Marital Status', 'Relationship', 'Race', 'Sex'], dtype='object')),
                                                 ('high_cardinality',
                      

## Load Dalex Explainer

In [17]:
adult_rf_exp = dx.Explainer(adult_rf, X, y, label = "Adult RF Pipeline")

Preparation of a new explainer is initiated

  -> data              : 32560 rows 14 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 32560 values
  -> target variable   : Please note that 'y' is a string array.
  -> target variable   : 'y' should be a numeric or boolean array.
  -> target variable   : Otherwise an Error may occur in calculating residuals or loss.
  -> model_class       : sklearn.ensemble._forest.ExtraTreesClassifier (default)
  -> label             : Adult RF Pipeline
  -> predict function  : <function yhat_proba_default at 0x7f69ec5f0050> will be used (default)
  -> predict function  : Accepts only pandas.DataFrame, numpy.ndarray causes problems.
  -> predicted values  : min = 0.241, mean = 0.241, max = 0.241
  -> model type        : classification will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         :  'residual_function' returns an Error w

In [18]:
pd_rf = adult_rf_exp.model_profile()
pd_rf.result

Calculating ceteris paribus: 100%|██████████| 14/14 [00:25<00:00,  1.84s/it]


Unnamed: 0,_vname_,_label_,_x_,_yhat_,_ids_
0,Age,Adult RF Pipeline,17.00,0.240817,0
1,Age,Adult RF Pipeline,17.73,0.240817,0
2,Age,Adult RF Pipeline,18.46,0.240817,0
3,Age,Adult RF Pipeline,19.19,0.240817,0
4,Age,Adult RF Pipeline,19.92,0.240817,0
...,...,...,...,...,...
601,Hours per Week,Adult RF Pipeline,95.08,0.240817,0
602,Hours per Week,Adult RF Pipeline,96.06,0.240817,0
603,Hours per Week,Adult RF Pipeline,97.04,0.240817,0
604,Hours per Week,Adult RF Pipeline,98.02,0.240817,0


In [19]:
pd_rf.plot()

In [14]:
adult_rf.predict(X) == y

0         True
1         True
2         True
3         True
4         True
         ...  
32555     True
32556    False
32557     True
32558     True
32559    False
Name: Income, Length: 32560, dtype: bool

In [3]:
df

Unnamed: 0,Age,Workclass,Final Weight,Education,Years of Education,Marital Status,Occupation,Relationship,Race,Sex,Capital Gain,Capital Loss,Hours per Week,Native Country,Income
0,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
1,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
2,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
3,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
4,37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32555,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32556,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32557,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32558,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K
