In [1]:
from z3 import *
from utils import *

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.tree import _tree, plot_tree
import pandas as pd
from matplotlib import pyplot as plt
import pickle
import time
import subprocess

In [3]:
data = load_breast_cancer()
feature_names = data["feature_names"]
X = data['data']
y = data["target"] == 1
feature_names = data["feature_names"]
ntrees = 50
clf = RandomForestClassifier(n_estimators=ntrees, max_depth=4).fit(X, y)

In [4]:
all_thresh = get_ens_thresh(clf)
data_ = disc_ens_data(X, all_thresh)
print(data_.shape)
data_.head()

(569, 508)


Unnamed: 0,23_848.9500122070312,2_78.20500183105469,22_117.44999694824219,13_32.98499870300293,22_117.05000305175781,9_0.06865999847650528,9_0.09659499675035477,9_0.06103000044822693,9_0.05894999951124191,9_0.0825200006365776,...,27_0.09152000024914742,27_0.1436000019311905,27_0.13234999775886536,27_0.1423499956727028,27_0.14554999768733978,3_306.0500030517578,13_39.96999931335449,22_114.45000076293945,13_39.170000076293945,23_686.4500122070312
0,True,True,True,True,True,True,False,True,True,False,...,True,True,True,True,True,True,True,True,True,True
1,True,True,True,True,True,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
2,True,True,True,True,True,False,False,False,True,False,...,True,True,True,True,True,True,True,True,True,True
3,False,False,False,False,False,True,True,True,True,True,...,True,True,True,True,True,True,False,False,False,False
4,True,True,True,True,True,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True


In [5]:
var_x = create_var_x(all_thresh)

In [6]:
nbits = 8
new_nbits = int(np.ceil(np.log2(ntrees)) + nbits)

In [7]:
epsilon = 0.2

index = 10
sample = X[index:index+1, :]

In [8]:
core_constraints, soft_constraints, c_weights, all_c = create_all_smt(
    clf, var_x, sample, epsilon
)
list_val_, list_c_ = list_c_val(c_weights, nbits)
sum_const, seq_num = sum_loop(list_val_, list_c_, new_nbits)
const_class = const_larger(nbits, ntrees, seq_num)

False


In [9]:
print("number of leaves: ", seq_num)

number of leaves:  611


In [10]:
s = Solver()

s.set("timeout", 20000)
for ci in core_constraints:
    s.add(ci)
    
for ci in sum_const:
    s.add(ci)
    
for ci in const_class:
    s.add(ci)

    
class_change = [Not(Bool("class")) if clf.predict(sample)[0] else Bool("class")]
s.add(class_change)


beg_time = time.time()

if s.check() == sat:
    adv_weights = get_output(s, c_weights)
    print(np.mean([v for k,v in adv_weights.items()]), clf.predict_proba(sample))
else: 
    print("unsat")
    
end_time = time.time()
print("sat check time + prepare output: ", end_time-beg_time)

0.512727320591057 [[0.77125496 0.22874504]]
sat check time + prepare output:  1.838109016418457


In [11]:
def toSMT2Benchmark(f, status="unknown", name="benchmark", logic=""):
    v = (Ast * 0)()
    return Z3_benchmark_to_smtlib_string(f.ctx_ref(), name, logic, status, "", 0, v, f.as_ast())

In [12]:
smt2 = toSMT2Benchmark(And(*core_constraints, *sum_const, *const_class, *class_change))

with open("smt2_test.smt", "w") as text_file:
    print(smt2, file=text_file)

In [13]:
beg_time = time.time()

bashCommand = "z3 -smt2 smt2_test.smt"

process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, universal_newlines=True)
output, error = process.communicate()

print("z3 cli time: ", time.time()-beg_time)

z3 cli time:  0.8933250904083252


In [14]:
print(output)

sat



# Goal definition

In [15]:
epsilon = 0.1

index = 10
sample = X[index:index+1, :]

core_constraints, soft_constraints, c_weights, all_c = create_all_smt(
    clf, var_x, sample, epsilon
)
list_val_, list_c_ = list_c_val(c_weights, nbits)
sum_const, seq_num = sum_loop(list_val_, list_c_, new_nbits)
const_class = const_larger(nbits, ntrees, seq_num)

False


In [16]:
g = Goal()

for ci in core_constraints:
    g.add(ci)
    
for ci in sum_const:
    g.add(ci)
    
for ci in const_class:
    g.add(ci)

g.add([Not(Bool("class")) if clf.predict(sample)[0] else Bool("class")])

In [17]:
t = Tactic('tseitin-cnf')
tg = t(g)

In [18]:
beg_time_convert = time.time()

all_ors = []
variables = {}
var_num = 0
for ti in tg[0]:
    ti = str(ti).replace("\n", "")
    if ti.startswith("Or"):
        ti = ti[3:-1]
    
    ti = ti.split(", ")
    inner_or = []
    for tii in ti:
        sign = +1
        tii = tii.strip()
        if "Not" in tii:
            tii = tii[4:-1]
            sign = -1
        if tii not in variables.keys():
            var_num += 1
            variables[tii] = var_num
        inner_or.append(sign*variables[tii])
    inner_or.append(0)
    all_ors.append(inner_or)

end_time = time.time()

print("conversion time: ", end_time-beg_time_convert)

conversion time:  62.98132300376892


In [19]:
rev_variables = {v:k for k,v in variables.items()}

In [20]:
beg_time_write = time.time()

num_vars = len(variables)
num_clauses = len(all_ors)
text = f"c  cnf_test.cnf \nc \np cnf {num_vars} {num_clauses} \n"
for i, li in enumerate(all_ors):
    str1 = ' '.join(str(e) for e in li)
    text += str1
    if i<len(all_ors)-1:
        text += "\n"

    
with open("cnf_test.cnf", "w") as text_file:
    print(text, file=text_file)
    
print("write time: ", time.time()-beg_time_write)

write time:  0.31450700759887695


In [21]:
beg_time_solve = time.time()

bashCommand = "/Users/329030704/Documents/CodeMakes/MapleCMS/bin/cryptominisat4_simple cnf_test.cnf"

process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, universal_newlines=True)
output, error = process.communicate()

print("solution time: ", time.time()-beg_time_solve)

solution time:  0.4438819885253906


In [22]:
outputs = {}
for pi in output.split("\n"):
    if pi.startswith("s"): 
        print(pi)
    if pi.startswith("v"):
        b = pi[1:].split()
        di = {rev_variables[np.abs(int(bi))]: True if int(bi) > 0 else False for bi in b if int(bi)!=0}
        outputs.update(di)

s SATISFIABLE


In [23]:
end_time = time.time()
print("sat check time + prepare output: ", end_time-beg_time_solve)

sat check time + prepare output:  0.5722558498382568


In [24]:
adv_weight = {ki:c_weights[ki] for ki in [k for k,v in outputs.items() if "c(" in k and v]}
np.mean([v for k,v in adv_weight.items()])

0.5010882240267552