Skip to content

Commit

Permalink
Merge branch 'main' into zhb/Causal
Browse files Browse the repository at this point in the history
  • Loading branch information
zhb000 committed Feb 28, 2022
2 parents 4c8cda1 + 274543c commit a4e2948
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
"id": "clinical-henry",
"metadata": {},
"source": [
"First, load the census dataset and specify the different types of features. Then, clean the target feature values to include only 0 and 1."
"First, load the census dataset and specify the different types of features. Compose a pipeline which contains a preprocessor and estimator."
]
},
{
Expand All @@ -99,7 +99,7 @@
" y = dataset[[target_feature]]\n",
" return X, y\n",
"\n",
"def clean_data(X, y, target_feature):\n",
"def create_classification_pipeline(X, y, target_feature):\n",
" features = X.columns.values.tolist()\n",
" classes = y[target_feature].unique().tolist()\n",
" pipe_cfg = {\n",
Expand All @@ -118,9 +118,13 @@
" ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n",
" ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])\n",
" ])\n",
" X = feat_pipe.fit_transform(X)\n",
" print(pipe_cfg['cat_cols'])\n",
" return X, feat_pipe, features, classes\n",
"\n",
" # Append classifier to preprocessing pipeline.\n",
" # Now we have a full prediction pipeline.\n",
" pipeline = Pipeline(steps=[('preprocessor', feat_pipe),\n",
" ('model', LGBMClassifier())])\n",
"\n",
" return pipeline\n",
"\n",
"outdirname = 'responsibleai.12.28.21'\n",
"try:\n",
Expand All @@ -140,30 +144,25 @@
"train_data = pd.read_csv('adult-train.csv')\n",
"test_data = pd.read_csv('adult-test.csv')\n",
"\n",
"\n",
"X_train_original, y_train = split_label(train_data, target_feature)\n",
"X_test_original, y_test = split_label(test_data, target_feature)\n",
"\n",
"pipeline = create_classification_pipeline(X_train_original, y_train, target_feature)\n",
"\n",
"X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train, target_feature)\n",
"y_train = y_train[target_feature].to_numpy()\n",
"\n",
"X_test = feat_pipe.transform(X_test_original)\n",
"y_test = y_test[target_feature].to_numpy()\n",
"\n",
"train_data[target_feature] = y_train\n",
"test_data[target_feature] = y_test\n",
"\n",
"test_data_sample = test_data.sample(n=500, random_state=5)\n",
"train_data_sample = train_data.sample(n=8000, random_state=5)"
"# Take 500 samples from the test data\n",
"test_data_sample = test_data.sample(n=500, random_state=5)"
]
},
{
"cell_type": "markdown",
"id": "potential-proportion",
"metadata": {},
"source": [
"Train a LightGBM classifier on the training data."
"Train the classification pipeline composed in the previous cell on the training data."
]
},
{
Expand All @@ -173,8 +172,7 @@
"metadata": {},
"outputs": [],
"source": [
"clf = LGBMClassifier()\n",
"model = clf.fit(X_train, y_train)"
"model = pipeline.fit(X_train_original, y_train)"
]
},
{
Expand Down Expand Up @@ -213,10 +211,8 @@
"metadata": {},
"outputs": [],
"source": [
"dashboard_pipeline = Pipeline(steps=[('preprocess', feat_pipe), ('model', model)])\n",
"\n",
"rai_insights = RAIInsights(dashboard_pipeline, train_data_sample, test_data_sample, target_feature, 'classification',\n",
" categorical_features=categorical_features)"
"rai_insights = RAIInsights(model, train_data, test_data_sample, target_feature, 'classification',\n",
" categorical_features=categorical_features)"
]
},
{
Expand Down Expand Up @@ -519,7 +515,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
"version": "3.7.11"
}
},
"nbformat": 4,
Expand Down
7 changes: 5 additions & 2 deletions raiwidgets/raiwidgets/responsibleai_dashboard_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ def __init__(
self.dashboard_input = analysis.get_data()

self._validate_cohort_list(cohort_list)
# Add cohort_list to dashboard_input
self.dashboard_input.cohortData = cohort_list
if cohort_list is not None:
# Add cohort_list to dashboard_input
self.dashboard_input.cohortData = cohort_list
else:
self.dashboard_input.cohortData = []

self._feature_length = len(self.dashboard_input.dataset.feature_names)
self._row_length = len(self.dashboard_input.dataset.features)
Expand Down
34 changes: 33 additions & 1 deletion raiwidgets/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

import pandas as pd
import pytest
import shap
import sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

from responsibleai import RAIInsights


@pytest.fixture(scope='session')
def create_rai_insights_object():
def create_rai_insights_object_classification():
X, y = shap.datasets.adult()
y = [1 if r else 0 for r in y]

Expand Down Expand Up @@ -41,3 +44,32 @@ def create_rai_insights_object():
skip_cat_limit_checks=True)
ri.compute()
return ri


@pytest.fixture(scope='session')
def create_rai_insights_object_regression():
housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(housing.data,
housing.target,
test_size=0.005,
random_state=7)
X_train = pd.DataFrame(X_train, columns=housing.feature_names)
X_test = pd.DataFrame(X_test, columns=housing.feature_names)

rfc = RandomForestRegressor(n_estimators=10, max_depth=4,
random_state=777)
model = rfc.fit(X_train, y_train)

X_train['target'] = y_train
X_test['target'] = y_test

ri = RAIInsights(model, X_train, X_test, 'target', 'regression')
ri.explainer.add()
ri.counterfactual.add(10, desired_range=[5, 10])
ri.error_analysis.add()
ri.causal.add(treatment_features=['AveRooms'],
heterogeneity_features=None,
upper_bound_on_cat_expansion=42,
skip_cat_limit_checks=True)
ri.compute()
return ri
5 changes: 3 additions & 2 deletions raiwidgets/tests/test_model_analysis_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def validate_rai_dashboard_data(self, rai_widget):
rai_widget.input.dashboard_input.counterfactualData[0],
CounterfactualData)

def test_model_analysis_adult(self, tmpdir, create_rai_insights_object):
ri = create_rai_insights_object
def test_model_analysis_adult(self, tmpdir,
create_rai_insights_object_classification):
ri = create_rai_insights_object_classification
with pytest.warns(
DeprecationWarning,
match="MODULE-DEPRECATION-WARNING: "
Expand Down
83 changes: 74 additions & 9 deletions raiwidgets/tests/test_responsibleai_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ def validate_rai_dashboard_data(self, rai_widget):
rai_widget.input.dashboard_input.counterfactualData[0],
CounterfactualData)

if rai_widget.input.dashboard_input.cohortData is not None:
if len(rai_widget.input.dashboard_input.cohortData) != 0:
assert isinstance(rai_widget.input.dashboard_input.cohortData[0],
Cohort)

# Make sure the dashboard input can be serialized
json.dumps(rai_widget.input.dashboard_input,
default=serialize_json_safe)

def test_responsibleai_adult(self, tmpdir, create_rai_insights_object):
ri = create_rai_insights_object
def test_responsibleai_adult_save_and_load(
self, tmpdir, create_rai_insights_object_classification):
ri = create_rai_insights_object_classification

widget = ResponsibleAIDashboard(ri)
self.validate_rai_dashboard_data(widget)
Expand All @@ -53,9 +54,73 @@ def test_responsibleai_adult(self, tmpdir, create_rai_insights_object):
widget_copy = ResponsibleAIDashboard(ri_copy)
self.validate_rai_dashboard_data(widget_copy)

def test_responsibleai_housing_save_and_load(
self, tmpdir, create_rai_insights_object_regression):
ri = create_rai_insights_object_regression

widget = ResponsibleAIDashboard(ri)
self.validate_rai_dashboard_data(widget)

save_dir = tmpdir.mkdir('save-dir')
ri.save(save_dir)
ri_copy = ri.load(save_dir)

widget_copy = ResponsibleAIDashboard(ri_copy)
self.validate_rai_dashboard_data(widget_copy)

def test_responsibleai_housing_with_pre_defined_cohorts(
self, create_rai_insights_object_regression):
ri = create_rai_insights_object_regression

cohort_filter_continuous_1 = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
arg=[30.5],
column='HouseAge')
cohort_filter_continuous_2 = CohortFilter(
method=CohortFilterMethods.METHOD_GREATER,
arg=[3.0],
column='AveRooms')

user_cohort_continuous = Cohort(name='Cohort Continuous')
user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_1)
user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_2)

cohort_filter_index = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
arg=[20],
column='Index')

user_cohort_index = Cohort(name='Cohort Index')
user_cohort_index.add_cohort_filter(cohort_filter_index)

cohort_filter_predicted_y = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
arg=[5.0],
column='Predicted Y')

user_cohort_predicted_y = Cohort(name='Cohort Predicted Y')
user_cohort_predicted_y.add_cohort_filter(cohort_filter_predicted_y)

cohort_filter_true_y = CohortFilter(
method=CohortFilterMethods.METHOD_GREATER,
arg=[1.0],
column='True Y')

user_cohort_true_y = Cohort(name='Cohort True Y')
user_cohort_true_y.add_cohort_filter(cohort_filter_true_y)

widget = ResponsibleAIDashboard(
ri,
cohort_list=[user_cohort_continuous,
user_cohort_index,
user_cohort_predicted_y,
user_cohort_true_y])

self.validate_rai_dashboard_data(widget)

def test_responsibleai_adult_with_pre_defined_cohorts(
self, create_rai_insights_object):
ri = create_rai_insights_object
self, create_rai_insights_object_classification):
ri = create_rai_insights_object_classification

cohort_filter_continuous_1 = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
Expand Down Expand Up @@ -95,8 +160,8 @@ def test_responsibleai_adult_with_pre_defined_cohorts(
self.validate_rai_dashboard_data(widget)

def test_responsibleai_adult_with_ill_defined_cohorts(
self, create_rai_insights_object):
ri = create_rai_insights_object
self, create_rai_insights_object_classification):
ri = create_rai_insights_object_classification

cohort_filter_continuous_1 = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
Expand All @@ -123,8 +188,8 @@ def test_responsibleai_adult_with_ill_defined_cohorts(
ri, cohort_list=[user_cohort_continuous, {}])

def test_responsibleai_adult_duplicate_cohort_names(
self, create_rai_insights_object):
ri = create_rai_insights_object
self, create_rai_insights_object_classification):
ri = create_rai_insights_object_classification

cohort_filter_continuous_1 = CohortFilter(
method=CohortFilterMethods.METHOD_LESS,
Expand Down
5 changes: 3 additions & 2 deletions raiwidgets/tests/test_responsibleai_dashboard_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@


class TestResponsibleAIDashboardInput:
def test_model_analysis_adult(self, create_rai_insights_object):
ri = create_rai_insights_object
def test_model_analysis_adult(
self, create_rai_insights_object_classification):
ri = create_rai_insights_object_classification
knn = ri.model
test_data = ri.test

Expand Down

0 comments on commit a4e2948

Please sign in to comment.