# Random Seeds

In [1]:
random_seed = 1999

import random
random.seed(random_seed)
import numpy as np
np.random.seed(random_seed)

# Train random forest on Iris dataset

In [4]:
import sys
sys.path.append("../../../src/")

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

from uncertainpy.explanation.randomForest import MonteCarloSampler

cont = MonteCarloSampler.type_cont
cat =  MonteCarloSampler.type_cat

#load mushrooms dataset
#mushroom data preprocessing is from https://www.kaggle.com/code/aavigan/uci-mushroom-data/notebook
mushrooms = pd.read_csv(r"../../../data/mushrooms.csv")
mushrooms = pd.get_dummies(mushrooms) #create dummy variables
c_names = ['poisonous', 'edible']
f_names = [a  for a in mushrooms.columns if a not in ['class_e', 'class_p']]
f_types = [cat] * 117 # 22 categorical features to 117 dummy features by one hot encoding
X = mushrooms[f_names]
y = mushrooms['class_e']

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# train a classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

print(f"f_names: {f_names}")
print(f"f_types: {f_types}")
print(f"c_names: {c_names}")

f_names: ['cap-shape_b', 'cap-shape_c', 'cap-shape_f', 'cap-shape_k', 'cap-shape_s', 'cap-shape_x', 'cap-surface_f', 'cap-surface_g', 'cap-surface_s', 'cap-surface_y', 'cap-color_b', 'cap-color_c', 'cap-color_e', 'cap-color_g', 'cap-color_n', 'cap-color_p', 'cap-color_r', 'cap-color_u', 'cap-color_w', 'cap-color_y', 'bruises_f', 'bruises_t', 'odor_a', 'odor_c', 'odor_f', 'odor_l', 'odor_m', 'odor_n', 'odor_p', 'odor_s', 'odor_y', 'gill-attachment_a', 'gill-attachment_f', 'gill-spacing_c', 'gill-spacing_w', 'gill-size_b', 'gill-size_n', 'gill-color_b', 'gill-color_e', 'gill-color_g', 'gill-color_h', 'gill-color_k', 'gill-color_n', 'gill-color_o', 'gill-color_p', 'gill-color_r', 'gill-color_u', 'gill-color_w', 'gill-color_y', 'stalk-shape_e', 'stalk-shape_t', 'stalk-root_?', 'stalk-root_b', 'stalk-root_c', 'stalk-root_e', 'stalk-root_r', 'stalk-surface-above-ring_f', 'stalk-surface-above-ring_k', 'stalk-surface-above-ring_s', 'stalk-surface-above-ring_y', 'stalk-surface-below-ring_f', 's

# Run Sampling Algorithm to approximate delta-sufficient and delta-necessary reasons.

The current proof-of-concept algorithm works in two stages:
- Stage 1: Estimate percentage of non-ambiguous inputs and determine all atomic almost necessary and sufficient reasons. The algorithm reports the remaining runtime and prints the results at termination.
- Stage 2: Approximate all almost necessary and sufficient reasons of size 2. This stage can take a long time because all candidates are considered exhaustively. Results are reported continuously. The exhaustive search will be replaced with a more goal-oriented search (find promising short explanations) in future versions. 

In [5]:
mcs = MonteCarloSampler(f_names, f_types, c_names, rf)
mcs.sample(10000)

Start Approximating Percentage of Nonambiguous Inputs and Atomic Queries
  ... sampling in progress ... completed 554/10000 samples ... estimated time remaining: 85 seconds ...
  ... sampling in progress ... completed 1121/10000 samples ... estimated time remaining: 79 seconds ...
  ... sampling in progress ... completed 1716/10000 samples ... estimated time remaining: 72 seconds ...
  ... sampling in progress ... completed 2285/10000 samples ... estimated time remaining: 67 seconds ...
  ... sampling in progress ... completed 2887/10000 samples ... estimated time remaining: 61 seconds ...
  ... sampling in progress ... completed 3443/10000 samples ... estimated time remaining: 57 seconds ...
  ... sampling in progress ... completed 4053/10000 samples ... estimated time remaining: 51 seconds ...
  ... sampling in progress ... completed 4649/10000 samples ... estimated time remaining: 46 seconds ...
  ... sampling in progress ... completed 5222/10000 samples ... estimated time remaining


  vs Feature cap-shape_f


  vs Feature cap-shape_k


  vs Feature cap-shape_s


  vs Feature cap-shape_x


  vs Feature cap-surface_f

   P( poisonous | 'cap-shape_b'=0,  'cap-surface_f'=0)=0.9081632653061225 based on 98 samples.


  vs Feature cap-surface_g

   P( poisonous | 'cap-shape_b'=0,  'cap-surface_g'=1)=0.9183673469387755 based on 98 samples.


  vs Feature cap-surface_s


  vs Feature cap-surface_y


  vs Feature cap-color_b


  vs Feature cap-color_c

   P( poisonous | 'cap-shape_b'=0,  'cap-color_c'=0)=0.9081632653061225 based on 98 samples.


  vs Feature cap-color_e


  vs Feature cap-color_g


  vs Feature cap-color_n

   P( poisonous | 'cap-shape_b'=0,  'cap-color_n'=0)=0.9 based on 100 samples.


  vs Feature cap-color_p

   P( poisonous | 'cap-shape_b'=0,  'cap-color_p'=0)=0.9381443298969072 based on 97 samples.


  vs Feature cap-color_r

   P( poisonous | 'cap-shape_b'=0,  'cap-color_r'=0)=0.90625 based on 96 samples.


  vs Feature cap-color_u

   P( poisonous |


  vs Feature gill-color_k


  vs Feature gill-color_n


  vs Feature gill-color_o


  vs Feature gill-color_p


  vs Feature gill-color_r


  vs Feature gill-color_u


  vs Feature gill-color_w


  vs Feature gill-color_y


  vs Feature stalk-shape_e


  vs Feature stalk-shape_t


  vs Feature stalk-root_?


  vs Feature stalk-root_b


  vs Feature stalk-root_c


  vs Feature stalk-root_e


  vs Feature stalk-root_r

   P( poisonous | 'cap-shape_b'=1,  'stalk-root_r'=0)=0.9175257731958762 based on 97 samples.


  vs Feature stalk-surface-above-ring_f


  vs Feature stalk-surface-above-ring_k


  vs Feature stalk-surface-above-ring_s


  vs Feature stalk-surface-above-ring_y


  vs Feature stalk-surface-below-ring_f


  vs Feature stalk-surface-below-ring_k


  vs Feature stalk-surface-below-ring_s


  vs Feature stalk-surface-below-ring_y


  vs Feature stalk-color-above-ring_b


  vs Feature stalk-color-above-ring_c


  vs Feature stalk-color-above-ring_e

   P( poisonous | 'cap-shap

   P( poisonous | 'cap-shape_c'=0,  'veil-color_n'=1)=0.9375 based on 96 samples.


  vs Feature veil-color_o


  vs Feature veil-color_w


  vs Feature veil-color_y


  vs Feature ring-number_n

   P( poisonous | 'cap-shape_c'=0,  'ring-number_n'=0)=0.9285714285714286 based on 98 samples.


  vs Feature ring-number_o

   P( poisonous | 'cap-shape_c'=0,  'ring-number_o'=0)=0.9278350515463918 based on 97 samples.


  vs Feature ring-number_t

   P( poisonous | 'cap-shape_c'=0,  'ring-number_t'=0)=0.9191919191919192 based on 99 samples.

   P( poisonous | 'cap-shape_c'=0,  'ring-number_t'=1)=0.9032258064516129 based on 93 samples.


  vs Feature ring-type_e


  vs Feature ring-type_f


  vs Feature ring-type_l

   P( poisonous | 'cap-shape_c'=0,  'ring-type_l'=1)=0.9081632653061225 based on 98 samples.


  vs Feature ring-type_n


  vs Feature ring-type_p

   P( poisonous | 'cap-shape_c'=0,  'ring-type_p'=0)=0.9696969696969697 based on 99 samples.


  vs Feature spore-print-color_b

   P

KeyboardInterrupt: 