Data source:

Kaggle. (2012). Titanic: Machine Learning from Disaster. Retrieved from https://www.kaggle.com/c/titanic/data.

In [1]:
import pandas as pd
import numpy as np
import sys
import os
sys.path.insert(1, '../../../pyreal')

In [2]:
from sklearn.model_selection import train_test_split

x_orig = pd.read_csv(os.path.join("data", "train.csv"))
y = x_orig["Survived"]
x_orig = x_orig.drop("Survived", axis="columns")

x_orig_train, x_orig_test, y_train, y_test = train_test_split(x_orig, y, test_size=.20)

print("Features:", x_orig.columns.values)

feature_descriptions = {
    "PassengerId": "Passenger ID",
    "Pclass": "Ticket Class",
    "SibSp": "Number of siblings/spouses aboard",
    "Parch": "Number of parents/children aboard",
    "Ticket": "Ticket Number",
    "Fare": "Passenger Fare",
    "Cabin": "Cabin Number",
    "Embarked": "Port of Embarkment",
}

FileNotFoundError: [Errno 2] No such file or directory: 'data/train.csv'

In [None]:
from pyreal.utils.transformer import OneHotEncoderWrapper, ColumnDropTransformer, MultiTypeImputer
from pyreal.utils.transformer import fit_transformers, run_transformers

column_drop = ColumnDropTransformer(["PassengerId", "Name", "Ticket", "Cabin"])
imputer = MultiTypeImputer()
one_hot_encoder = OneHotEncoderWrapper(["Sex", "Embarked"])

transformers = [column_drop, imputer, one_hot_encoder]
fit_transformers(transformers, x_orig_train)
x_transform_train = run_transformers(transformers, x_orig_train)
x_transform_test = run_transformers(transformers, x_orig_test)

In [None]:
from sklearn.linear_model import LogisticRegression
import pickle

model = LogisticRegression(max_iter=500)
model.fit(x_transform_train, y_train)

preds = model.predict(x_transform_test)
print("Test accuracy: %.2f" % (np.mean(preds==y_test)*100))

In [None]:
from pyreal.explainers import LocalFeatureContribution

lfc = LocalFeatureContribution(model=model, x_orig=x_orig_train, m_transforms=transformers, e_transforms=transformers,  
                               feature_descriptions=feature_descriptions, fit_on_init=True)

In [None]:
from pyreal.utils import visualize

input_to_explain = x_orig_test.iloc[0]
contributions = lfc.produce(input_to_explain)
x_interpret = lfc.convert_data_to_interpretable(input_to_explain)

visualize.plot_top_contributors(contributions, select_by="absolute", values=x_interpret)