# Random Seeds

In [1]:
random_seed = 1999

import random
random.seed(random_seed)
import numpy as np
np.random.seed(random_seed)

# Train random forest on Iris dataset

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_seed)

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

RandomForestClassifier()

# Run Sampling Algorithm to approximate delta-sufficient and delta-necessary reasons

The current proof-of-concept algorithm works in two stages:
- Stage 1: Estimate percentage of non-ambiguous inputs and determine all atomic almost necessary and sufficient reasons. The algorithm reports the remaining runtime and prints the results at termination.
- Stage 2: Approximate all almost necessary and sufficient reasons of size 2. This stage can take a long time because all candidates are considered exhaustively. Results are reported continuously. The exhaustive search will be replaced with a more goal-oriented search (find promising short explanations) in future versions. 

In [8]:
import sys
sys.path.append("../../../src/")
 
from uncertainpy.explanation.randomForest import MonteCarloSampler

cont = MonteCarloSampler.type_cont
cat =  MonteCarloSampler.type_cat

f_names = iris.feature_names
f_types = [cont,cont,cont,cont]
c_names = iris.target_names

mcs = MonteCarloSampler(f_names, f_types, c_names, rf)
mcs.sample(10000)

Start Approximating Percentage of Nonambiguous Inputs and Atomic Queries
  ... sampling in progress ... completed 873/10000 samples ... estimated time remaining: 52 seconds ...
  ... sampling in progress ... completed 1737/10000 samples ... estimated time remaining: 47 seconds ...
  ... sampling in progress ... completed 2608/10000 samples ... estimated time remaining: 42 seconds ...
  ... sampling in progress ... completed 3499/10000 samples ... estimated time remaining: 37 seconds ...
  ... sampling in progress ... completed 4363/10000 samples ... estimated time remaining: 32 seconds ...
  ... sampling in progress ... completed 5221/10000 samples ... estimated time remaining: 27 seconds ...
  ... sampling in progress ... completed 6079/10000 samples ... estimated time remaining: 22 seconds ...
  ... sampling in progress ... completed 6930/10000 samples ... estimated time remaining: 17 seconds ...
  ... sampling in progress ... completed 7807/10000 samples ... estimated time remaining

   P( versicolor | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(3.199999988079071, 4.700000047683716))=0.9347826086956522 based on 92 samples.

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(4.950000047683716, 5.0))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(5.0, 5.1499998569488525))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(5.1499998569488525, 5.200000047683716))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(5.200000047683716, 5.25))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal length (cm)'=(6.25, inf))=1.0 based on 100 samples.


  vs Feature petal width (cm)

   P( virginica | 'sepal length (cm)'=(5.6499998569488525, 5.75),  'petal width (cm)'=(2.75, in

   P( virginica | 'sepal length (cm)'=(6.150000095367432, 6.200000047683716),  'petal length (cm)'=(5.200000047683716, 5.25))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(6.150000095367432, 6.200000047683716),  'petal length (cm)'=(6.25, inf))=1.0 based on 100 samples.


  vs Feature petal width (cm)

   P( virginica | 'sepal length (cm)'=(6.150000095367432, 6.200000047683716),  'petal width (cm)'=(1.649999976158142, 1.699999988079071))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(6.150000095367432, 6.200000047683716),  'petal width (cm)'=(1.699999988079071, 1.75))=1.0 based on 100 samples.

   P( virginica | 'sepal length (cm)'=(6.150000095367432, 6.200000047683716),  'petal width (cm)'=(2.75, inf))=1.0 based on 100 samples.


  vs Feature sepal width (cm)


  vs Feature petal length (cm)

   P( virginica | 'sepal length (cm)'=(6.200000047683716, 6.25),  'petal length (cm)'=(4.950000047683716, 5.0))=1.0 based on 100 samples.

   P( virginica |

   P( versicolor | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(2.8000000715255737, 3.199999988079071))=0.90625 based on 96 samples.

   P( virginica | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(4.950000047683716, 5.0))=1.0 based on 100 samples.

   P( virginica | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(5.0, 5.1499998569488525))=1.0 based on 100 samples.

   P( virginica | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(5.1499998569488525, 5.200000047683716))=1.0 based on 100 samples.

   P( virginica | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(5.200000047683716, 5.25))=1.0 based on 100 samples.

   P( virginica | 'sepal width (cm)'=(2.25, 2.350000023841858),  'petal length (cm)'=(6.25, inf))=1.0 based on 100 samples.


  vs Feature petal width (cm)


  vs Feature petal length (cm)

   P( versicolor | 'sepal width (cm)'=(2.350000023841858, 2.399999976158142),  'peta

KeyboardInterrupt: 