# Random Seeds

In [1]:
random_seed = 1999

import random
random.seed(random_seed)
import numpy as np
np.random.seed(random_seed)

# Train random forest 

In [4]:
import sys
sys.path.append("../../../src/")

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

from uncertainpy.explanation.randomForest import MonteCarloSampler

cont = MonteCarloSampler.type_cont
cat =  MonteCarloSampler.type_cat


# # load pima dataset
filename = r"../../../data/pima.csv"
dataset_df = pd.read_csv(filename, nrows=769)
dateset_nd = dataset_df.values
number_features = len(dataset_df.columns)-1
X = dateset_nd[:,0:number_features]
y = dateset_nd[:,number_features]
f_names = list(dataset_df.columns[0:number_features])
c_names = ['Neg','Pos']
f_types = [cont] * 8

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# train a classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

print(f"f_names: {f_names}")
print(f"f_types: {f_types}")
print(f"c_names: {c_names}")

f_names: ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
f_types: [1, 1, 1, 1, 1, 1, 1, 1]
c_names: ['Neg', 'Pos']


# Run Sampling Algorithm to approximate delta-sufficient and delta-necessary reasons

The current proof-of-concept algorithm works in two stages:
- Stage 1: Estimate percentage of non-ambiguous inputs and determine all atomic almost necessary and sufficient reasons. The algorithm reports the remaining runtime and prints the results at termination.
- Stage 2: Approximate all almost necessary and sufficient reasons of size 2. This stage can take a long time because all candidates are considered exhaustively. Results are reported continuously. The exhaustive search will be replaced with a more goal-oriented search (find promising short explanations) in future versions. 

In [5]:
mcs = MonteCarloSampler(f_names, f_types, c_names, rf)
mcs.sample(50000)

Start Approximating Percentage of Nonambiguous Inputs and Atomic Queries
  ... sampling in progress ... completed 671/50000 samples ... estimated time remaining: 368 seconds ...
  ... sampling in progress ... completed 1334/50000 samples ... estimated time remaining: 365 seconds ...
  ... sampling in progress ... completed 1953/50000 samples ... estimated time remaining: 369 seconds ...
  ... sampling in progress ... completed 2672/50000 samples ... estimated time remaining: 354 seconds ...
  ... sampling in progress ... completed 3333/50000 samples ... estimated time remaining: 350 seconds ...
  ... sampling in progress ... completed 3967/50000 samples ... estimated time remaining: 348 seconds ...
  ... sampling in progress ... completed 4605/50000 samples ... estimated time remaining: 345 seconds ...
  ... sampling in progress ... completed 5214/50000 samples ... estimated time remaining: 343 seconds ...
  ... sampling in progress ... completed 5895/50000 samples ... estimated time r

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(42.0, 47.0))=0.9696969696969697 based on 99 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(47.0, 47.5))=0.9393939393939394 based on 99 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(47.5, 48.5))=0.9489795918367347 based on 98 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(48.5, 74.5))=0.9393939393939394 based on 99 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(74.5, 75.5))=0.9292929292929293 based on 99 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(75.5, 77.5))=0.9381443298969072 based on 97 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(77.5, 78.0))=0.9285714285714286 based on 98 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(78.0, 78.5))=0.9693877551020408 based on 98 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(78.5, 79.0))=0.9494949494949495 based on 99 samples.

   P( Neg | 'Pregnancies'=(-inf, 0.5),  'Glucose'=(79.0

KeyboardInterrupt: 