In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
import gurobipy as gp
from gurobipy import GRB
from decision_tree import MultivariateDecisionTree
from robust_multivariate_tree import RDT
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import random

In [2]:
# toy example
# X_train = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
# y_train = np.array([0, 0, 1, 0])

In [3]:
# iris (uncomment the below line and pass the number of classes as an argument)
def iris_dataset(num_classes=3):
    iris = datasets.load_iris()
    X, y = iris["data"], iris["target"]
    if num_classes == 2:
        X = np.concatenate((X[:50, :], X[100:, :]))
        y = np.concatenate((y[:50], y[100:]))
#         X = X[:100, :]
#         y = y[:100]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)
    scaler = preprocessing.MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = iris_dataset(3)

In [4]:
rdt = RDT(verbose=0, max_depth=2, budget=0.5, epsilon=0.001, num_cuts=1, time_limit=np.inf, obj_relax=np.inf)

In [5]:
rdt.fit(X_train, y_train)

Set parameter Username
Academic license - for non-commercial use only - expires 2024-09-04
Accuracy:  1.0
Objective Value:  100.0
---------------------------------------------------------
Iteration Number: 1

Accuracy (perturbed) before adding cut:  0.89
Accuracy:  0.47
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 2

Accuracy (perturbed) before adding cut:  0.35
Accuracy:  0.57
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 3

Accuracy (perturbed) before adding cut:  0.47
Accuracy:  0.25
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 4

Accuracy (perturbed) before adding cut:  0.16
Accuracy:  0.57
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 5

Accuracy (perturbed) before adding cut:  0.45
Accuracy:  0.25
Objective Value:  100.0
-----------------------------------------

Accuracy:  0.64
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 50

Accuracy (perturbed) before adding cut:  0.54
Accuracy:  0.38
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 51

Accuracy (perturbed) before adding cut:  0.3
Accuracy:  0.84
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 52

Accuracy (perturbed) before adding cut:  0.75
Accuracy:  0.67
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 53

Accuracy (perturbed) before adding cut:  0.62
Accuracy:  0.79
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 54

Accuracy (perturbed) before adding cut:  0.69
Accuracy:  0.73
Objective Value:  100.0
---------------------------------------------------------

Iteration Number: 55

Accuracy (perturbed) before adding cut:  0.64

GurobiError: Unable to retrieve attribute 'X'

In [6]:
rdt.time_stats

Unnamed: 0,Perturbed Accuracy,Accuracy,Objective Value,Subproblem Time,Master Problem Time
1,0.89,0.47,100.0,0.099580,0.069885
2,0.35,0.57,100.0,0.043988,0.083413
3,0.47,0.25,100.0,0.051588,0.103971
4,0.16,0.57,100.0,0.026880,0.135965
5,0.45,0.25,100.0,0.062773,0.281253
...,...,...,...,...,...
59,0.54,0.34,53.0,0.059959,2.086685
60,0.34,0.61,100.0,0.029053,9.466441
61,0.51,0.53,100.0,0.057687,7.576605
62,0.43,0.34,84.0,0.049984,20.435585


In [6]:
model = rdt.second
c, a, a_cap, b, b_cap, gamma, g = model._vars

a_vals = model.getAttr('X', a)
b_vals = model.getAttr('X', b)
c_vals = model.getAttr('X', c)

In [7]:
a_vals

{(1, 0): 0.0,
 (1, 1): 0.0,
 (1, 2): 0.0,
 (1, 3): -1.0,
 (2, 0): 0.0,
 (2, 1): 0.565730771981442,
 (2, 2): -0.13481777805652367,
 (2, 3): 0.0,
 (3, 0): 0.0,
 (3, 1): 0.5150758229920824,
 (3, 2): -0.4849241770079175,
 (3, 3): 0.0}

In [8]:
b_vals

{1: -0.6556666666666668, 2: 0.01306066069118505, 3: 0.0076910140636304025}

In [9]:
c_vals

{(4, 0): -0.0,
 (4, 1): 0.0,
 (4, 2): 1.0,
 (5, 0): -0.0,
 (5, 1): 0.0,
 (5, 2): 1.0,
 (6, 0): -0.0,
 (6, 1): 1.0,
 (6, 2): -0.0,
 (7, 0): 1.0,
 (7, 1): 0.0,
 (7, 2): -0.0}

In [10]:
count = 0
for i, x in enumerate(X_test):
    t = 1
    while t <= len(rdt.branch_nodes):
        if (np.dot([a_vals[t, f] for f in range(rdt.n_features)], x) > b_vals[t] + rdt.epsilon) or (np.abs(np.dot([a_vals[t, f] for f in range(rdt.n_features)], x) - b_vals[t] - rdt.epsilon) <= 0.001):
            t = 2*t + 1
        else:
            t = 2*t
            
    if c_vals[t, y_test[i]] > 0.5:
        count += 1
        
count

48