# Using the high-level API to verify the robustness of the Ensemble trees.

In [1]:
import sys
sys.path.append("..")

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import xgboost
from sklearn.ensemble import RandomForestClassifier
from satreeverify import SATreeAttack

  from pandas import MultiIndex, Int64Index


In [3]:
data = load_breast_cancer()
X = data['data']
y = data["target"]
feature_names = data["feature_names"]
y = y==1

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# XGBoost Model

In [5]:
ntrees = 50
clf = xgboost.XGBClassifier(n_estimators=ntrees, max_depth=3).fit(x_train, y_train)
print(clf.score(x_test, y_test))

attack = SATreeAttack(clf)



0.9824561403508771


In [6]:
epsilon = 0.4

index = 105
sample = X[index:index+1, :]
clf.predict_proba(sample)

array([[0.9903275 , 0.00967252]], dtype=float32)

- x_adv is the SAT results
- x_adv_sample is the x_adv transformed to the original input space
- compare is a pandas dataframe that compares the original input and the x_adv_sample

In [7]:
result = attack.soft_attack(sample, epsilon)
result["comparison"]

Unnamed: 0,0,1,diff(%),bound
0,13.11,13.11,0.0,"[-inf, 15.0450001]"
1,15.56,16.562121,0.064404,"[16.5600014, 18.6800003]"
2,87.21,87.21,0.0,"[-inf, inf]"
3,530.2,530.2,0.0,"[-inf, 689.450012]"
4,0.1398,0.089955,0.356547,"[0.0896499977, 0.0899550021]"
5,0.1765,0.1765,0.0,"[0.0716100037, inf]"
6,0.2071,0.2071,0.0,"[0.0975949988, inf]"
7,0.09601,0.071987,0.250211,"[0.0492300019, 0.072010003]"
8,0.1925,0.1925,0.0,"[0.166500002, inf]"
9,0.07692,0.07692,0.0,"[-inf, inf]"


In [8]:
clf.predict(sample), clf.predict(result["adv_sample"])

(array([False]), array([ True]))

- x_adv is the SAT results
- x_adv_sample is the x_adv transformed to the original input space
- compare is a pandas dataframe that compares the original input and the x_adv_sample

In [9]:
result = attack.hard_attack(sample, epsilon, 8) # hard attack binarizes the decision values. here we use 8 bits to binarize the decision values
result["comparison"]

Unnamed: 0,0,1,diff(%),bound
0,13.11,15.060045,0.148745,"[15.0450001, inf]"
1,15.56,14.97501,0.037596,"[-inf, 14.9899998]"
2,87.21,87.21,0.0,"[-inf, inf]"
3,530.2,530.2,0.0,"[-inf, 696.25]"
4,0.1398,0.108632,0.222948,"[0.0904600024, 0.108649999]"
5,0.1765,0.1765,0.0,"[0.0716100037, inf]"
6,0.2071,0.2071,0.0,"[0.0975949988, inf]"
7,0.09601,0.071987,0.250211,"[0.0492300019, 0.072010003]"
8,0.1925,0.166334,0.13593,"[-inf, 0.166500002]"
9,0.07692,0.07692,0.0,"[-inf, inf]"


In [10]:
clf.predict(sample), clf.predict(result["adv_sample"])

(array([False]), array([ True]))

# RandomForest Model

In [11]:
clf = RandomForestClassifier(n_estimators=ntrees, max_depth=3).fit(X, y)
print(clf.score(x_test, y_test))

attack = SATreeAttack(clf)

1.0


In [12]:
epsilon = 0.4

index = 105
sample = X[index:index+1, :]
clf.predict_proba(sample)

array([[0.84967368, 0.15032632]])

- x_adv is the SAT results
- x_adv_sample is the x_adv transformed to the original input space
- compare is a pandas dataframe that compares the original input and the x_adv_sample

In [13]:
result = attack.soft_attack(sample, epsilon)
result["comparison"]

Unnamed: 0,0,1,diff(%),bound
0,13.11,10.302687,0.214135,"[-inf, 10.312999725341797]"
1,15.56,14.954185,0.038934,"[14.139999866485596, 14.954999923706055]"
2,87.21,72.513513,0.168518,"[71.0250015258789, 72.5150032043457]"
3,530.2,530.2,0.0,"[-inf, 536.4000091552734]"
4,0.1398,0.108443,0.224301,"[0.10114999860525131, 0.10844999924302101]"
5,0.1765,0.134932,0.235514,"[0.11674999818205833, 0.13494999706745148]"
6,0.2071,0.137328,0.3369,"[0.11534999683499336, 0.13735000044107437]"
7,0.09601,0.09601,0.0,"[0.06218999996781349, inf]"
8,0.1925,0.149995,0.220804,"[0.1453000009059906, 0.14999999850988388]"
9,0.07692,0.08006,0.040821,"[0.07997999712824821, inf]"


In [14]:
clf.predict(sample), clf.predict(result["adv_sample"])

(array([False]), array([ True]))

- x_adv is the SAT results
- x_adv_sample is the x_adv transformed to the original input space
- compare is a pandas dataframe that compares the original input and the x_adv_sample

In [15]:
result = attack.hard_attack(sample, epsilon, 8)
result["comparison"]

Unnamed: 0,0,1,diff(%),bound
0,13.11,10.302687,0.214135,"[-inf, 10.312999725341797]"
1,15.56,15.014975,0.035027,"[14.990000247955322, 15.015000343322754]"
2,87.21,72.442488,0.169333,"[-inf, 72.5150032043457]"
3,530.2,696.050218,0.312807,"[696.0500183105469, 696.25]"
4,0.1398,0.10113,0.276611,"[0.0809599980711937, 0.10114999860525131]"
5,0.1765,0.1765,0.0,"[0.16804999858140945, 0.1906999945640564]"
6,0.2071,0.156059,0.246455,"[0.11534999683499336, 0.15610000491142273]"
7,0.09601,0.060396,0.370943,"[0.05615000054240227, 0.06039999984204769]"
8,0.1925,0.1925,0.0,"[0.14999999850988388, inf]"
9,0.07692,0.07692,0.0,"[0.061650000512599945, 0.07997999712824821]"


In [16]:
clf.predict(sample), clf.predict(result["adv_sample"])

(array([False]), array([ True]))