In [None]:
import pandas as pd
import pickle
import shap

# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
X_explain = impute(X_orig) # impute() is a helper function that imputes missing values
columns_to_encode = X_explain.select_dtypes(include=["object"]).columns
to_encode = X_explain[columns_to_encode]
ohe = OneHotEncoder(sparse=False).fit(to_encode)
encoded_columns = ohe.get_feature_names(to_encode.columns)
index = to_encode.index
encoded = ohe.transform(to_encode)
encoded_df = pd.DataFrame(encoded, columns=encoded_columns, index=index)
X_explain = pd.concat(
  [X_explain.drop(columns_to_encode, axis="columns"), encoded_df], axis=1)
columns = X_explain.columns
# ================================================================================================

# PART 2: Load in and generate a human-readable explanation for a model ==========================
model = pickle.load(open("../tutorials/ames_housing/model.pkl", "rb"))

# Generate a SHAP-type explanation
explainer = shap.Explainer(model, X_explain)
explanation = explainer(X_explain.iloc[0:1])
explanation_df = pd.DataFrame(explanation.values, columns=columns)

# Convert explanation to a more human-readable form
for col in columns_to_encode:
    encoded_features = [item for item in encoded_columns if item.startswith(col+'_')]
    summed_contribution = explanation_df[encoded_features].sum(axis=1)
    explanation_df = explanation_df.drop(encoded_features, axis="columns")
    explanation_df[col] = summed_contribution
descriptions = {...} # dictionary of feature name to human-readable description
explanation_df = explanation_df.rename(descriptions, axis='columns')
# ================================================================================================

In [None]:
# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
object_columns = X_orig.select_dtypes(include=["object"]).columns
transformers = [Imputer(), OneHotEncoder(object_columns)]
fit_transformers(transformers, X_orig)

# PART 2: Load in and generate a human-readable explanation for a model ==========================
descriptions = {...} # dictionary of feature name to human-readable description
lfc = LocalFeatureContribution(
    model="../tutorials/ames_housing/model.pkl",
    x_train_orig=X_orig, y_orig=y,
    e_algorithm="shap",
    transformers=transformers,
    feature_descriptions=descriptions)
lfc.fit()
explanation = lfc.produce(X_orig.iloc[0:1])
# ================================================================================================

In [None]:
# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
# One-hot encode the categorical "ocean_proximity" feature
x_to_encode = X_orig[["ocean_proximity"]]
ohe = SklearnOneHotEncoder(sparse=False).fit(x_to_encode)
encoded_columns = ohe.get_feature_names(x_to_encode.columns)
index = x_to_encode.index
ocean_encoded = ohe.transform(x_to_encode)
ocean_encoded = pd.DataFrame(ocean_encoded, columns=encoded_columns, index=index)
X_explain = pd.concat([X_orig.drop("ocean_proximity", axis="columns"), ocean_encoded], axis=1)

# Convert the latitude and longitude features to the nearest city
X_interpret = X_orig.copy()
for index, row in cities.iterrows():
    lat = row["Latitude"]
    lon = row["Longitude"]
    X_interpret.loc[(X_interpret["latitude"] > lat-0.1)
                    & (X_interpret["latitude"] < lat+0.1)
                    & (X_interpret["longitude"] > lon-0.1)
                    & (X_interpret["longitude"] < lon+0.1), "city"] = row["Name"]
X_interpret = X_interpret.drop("latitude", axis=1)
X_interpret = X_interpret.drop("longitude", axis=1)
columns = X_explain.columns
# =================================================================================================

# PART 2: Load in and generate a human-readable explanation for a model ==========================
explainer = shap.Explainer(model, X_explain)
explanation = explainer(X_explain)
explanation = np.mean(np.absolute(explanation.values), axis=0).reshape(1, -1)
explanation_df = pd.DataFrame(explanation, columns=columns)

encoded_features = [item for item in encoded_columns if item.startswith("ocean_proximity_")]
summed_contribution = explanation_df[encoded_features].sum(axis=1)
explanation_df = explanation_df.drop(encoded_features, axis="columns")
explanation_df["ocean_proximity"] = summed_contribution
explanation_df["city"] = explanation_df["longitude"] + explanation_df["latitude"]
explanation_df = explanation_df.drop("longitude", axis=1)
explanation_df = explanation_df.drop("latitude", axis=1)
shap_explanation = explanation_df
# ================================================================================================

In [None]:
# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
# Define a transformer that convert latitude/longitude values to city names
class CityConverter(Transformer):
  def __init__(self, **kwargs):
    self.cities = cities # a dataframe of longitude/latitude values for cities
    super().__init__(**kwargs)
  def data_transform(self, x):
    for index, row in self.cities.iterrows():
      lat = row["Latitude"]
      lon = row["Longitude"]
      x.loc[(x["latitude"] > lat-0.1)
            & (x["latitude"] < lat+0.1)
            & (x["longitude"] > lon-0.1)
            & (x["longitude"] < lon+0.1), "city"] = row["Name"]
    x = x.drop("latitude", axis=1)
    x = x.drop("longitude", axis=1)
    return x
  def transform_explanation_additive_contributions(self, explanation): #****
    explanation = explanation.get()
    explanation["city"] = explanation["longitude"] + explanation["latitude"]
    explanation = explanation.drop("longitude", axis=1)
    explanation = explanation.drop("latitude", axis=1)
    return AdditiveFeatureContributionExplanation(explanation)

# Intitialize and fit the transformers
one_hot_encoder = OneHotEncoder(columns=["ocean_proximity"])
city_converter = CityConverter(model=False, interpret=True)
transformers = [one_hot_encoder, city_converter]
fit_transformers(transformers, X_orig)
# ================================================================================================

# PART 2: Load in and generate a human-readable explanation for a model ==========================
global_explainer = GlobalFeatureImportance(
    model, x_train_orig=X_orig, y_orig=y, e_algorithm="shap", transformers=transformers)
global_explainer.fit()
pyreal_explanation = global_explainer.produce()
# ================================================================================================

In [None]:
# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
# Define a transformer that converts yes/no labels to boolean features
def boolean_encode(X):
    x_transform = X.copy()
    for col in ["schoolsup", "famsup", "paid", "activities", "nursery", "internet", "romantic", "higher"]:
        x_transform[col] = x_transform[col].replace(('yes', 'no'), (1, 0))
    x_transform["famsize"] = x_transform["famsize"].astype('category')
    x_transform["famsize"] = x_transform["famsize"].cat.set_categories(['LE3', 'GT3'])
    x_transform["famsize"] = x_transform["famsize"].cat.reorder_categories(['LE3', 'GT3'])
    x_transform["famsize"] = x_transform["famsize"].cat.codes
    return x_transform

# Transform the data to the algorithm-ready state
X_explain = boolean_encode(X_orig)
columns_to_encode = ["school", "sex", "address", "Pstatus", "reason", "guardian", "Mjob", "Fjob"]
to_encode = X_orig[columns_to_encode]
ohe = SklearnOneHotEncoder(sparse=False).fit(to_encode)
encoded_columns = ohe.get_feature_names(to_encode.columns)
index = to_encode.index
encoded = ohe.transform(to_encode)
encoded = pd.DataFrame(encoded, columns=encoded_columns, index=index)
X_explain = pd.concat([X_explain.drop(columns_to_encode, axis="columns"), encoded], axis=1)
standard_scaler = StandardScaler()
columns = X_explain.columns
index = X_explain.index
X_explain = pd.DataFrame(standard_scaler.fit_transform(X_explain), columns=columns, index=index)

# PART 2: Load in and generate a human-readable explanation for a model ==========================
model = pickle.load(open("../tutorials/student/model.pkl", "rb"))

# Train a decision tree model that makes similar predictions to the real model
tree_explainer = tree.DecisionTreeClassifier()
tree_explainer.fit(X_explain, model.predict(X_explain))

In [None]:
# PART 1: Define and fit the transformers that will get the data to the state ====================
#         required to run the explanation algorithm
# Define a transformer that converts yes/no labels to boolean features
class BooleanEncoder(Transformer):
    def __init__(self, cols, **kwargs):
        self.cols = cols
        super().__init__(**kwargs)
    def data_transform(self, x):
        x_transform = x.copy()
        for col in self.cols:
            x_transform[col] = x_transform[col].replace(('yes', 'no'), (1, 0))
        x_transform["famsize"] = x_transform["famsize"].astype('category')
        x_transform["famsize"] = x_transform["famsize"].cat.set_categories(['LE3', 'GT3'])
        x_transform["famsize"] = x_transform["famsize"].cat.reorder_categories(['LE3', 'GT3'])
        x_transform["famsize"] = x_transform["famsize"].cat.codes
        return x_transform
    def inverse_transform_explanation(self, explanation):
        return explanation

# Define and fit the transformers
onehotencoder = OneHotEncoder(["school", "sex", "address", "Pstatus",
                              "reason", "guardian", "Mjob", "Fjob"])
boolean_encoder = BooleanEncoder(
    ["schoolsup", "famsup", "paid", "activities", "nursery", "internet", "romantic", "higher"])
standard_scaler = DataFrameWrapper(StandardScaler())
transformers = [onehotencoder, boolean_encoder, standard_scaler]
fit_transformers(transformers, X_orig)

# PART 2: Load in and generate a human-readable explanation for a model ==========================
# Generate the explanation - a decision tree model that makes similar predictions to the real model
dte = DecisionTreeExplainer(model="../tutorials/student/model.pkl",
                            x_train_orig=X_orig, transformers=transformers,
                            is_classifier=True, max_depth=4,
                            feature_descriptions=feature_descriptions,
                            fit_on_init=True)
explanation_pyreal = dte.produce()
