# MILP Formulation from Nathan Kallus' Paper (Problem 4)

In [1]:
import gurobipy as gp
from gurobipy import GRB
import sys
import math
import numpy as np
import pandas as pd
import random

## Model specifications
#### Since we chose to modify the formulation to a certain extent, these variables simply allow us to revert back to the original model
- delta_include: If true, then constraint 4c from original formulation holds. If false, only the added constraint that gamma[p] need to add to 1 holds
- different_Cp = If true, then we have different sets of branching choices for every non-leaf node (like original formulation). If false, then we have a static set C for all nodes

In [2]:
# -- This class is an alternative to solving the right/left ancestor problem --
"""
INPUT: d = depth of tree (which includes root node), so d = 2 would make a tree with {1, 2, 3}
RELEVANT FUNCTIONS:
- get_right_left: For all leaf nodes, returns its right and left ancestors in a dictionary 
                  of {(p, q): 1 or -1 if q is right or left ancestor respectively}
"""
class Tree:
    def __init__(self, d):
        self.depth = d
        self.nodes = list(range(1, 2**(d-1)))
        self.leaves = list(range(2**(d-1), 2**d))
        self.ancestor_rl = {}
    
    def get_left_children(self, n):
        if n in self.nodes:
            return int(2*n)
        else:
            raise Exception ('Invalid node n')
    
    def get_right_children(self, n):
        if n in self.nodes:
            return int(2*n+1)
        else:
            raise Exception ('Invalid node n')
    
    def get_parent(self, n):
        if (n in self.nodes) | (n in self.leaves):
            return int(math.floor(n/2))
        else:
            raise Exception ('Invalide node n')
    
    def get_ancestors(self, direction, n):
        current = n
        ancestors = []
        while current != 1:
            current_buffer = self.get_parent(current)
            if direction == 'r':
                if self.get_right_children(current_buffer) == current:
                    ancestors.append(current_buffer)
            else:
                if self.get_left_children(current_buffer) == current:
                    ancestors.append(current_buffer)
            current = current_buffer
        return ancestors
    
    def get_right_left(self):
        for i in self.leaves:
            right = self.get_ancestors('r', i)
            for j in right:
                self.ancestor_rl[(i, j)] = 1
            left = self.get_ancestors('l', i)
            for j in left:
                self.ancestor_rl[(i, j)] = -1
        return self.ancestor_rl

# Evaluating Synthetic Data

In [3]:
def open_file(file_path):
    df = pd.read_csv(file_path)
    #train_X = df.iloc[:, :25]
    train_X = df[['AGE2', 'AGE3', 'RVISINF', 'RSBP2', 'RSBP3', 'RSBP4', 'RDEF3', 'RDEF4', 'RDEF5', 'RCONSC1', 'RCONSC2']]
    real = df[['y0', 'y1', 'y2', 'y3', 'y4', 'y5']]
    train_t = df['t']
    train_y = df['y']
    return train_X, train_t, train_y, real

In [5]:
def open_file_v2(file_path):
    df = pd.read_csv(file_path)
    train_X = df.iloc[:, :3]
    real = df.iloc[:, 3:5]
    train_t = df.iloc[:, 5]
    train_y = df.iloc[:, 7]
    return train_X, train_t, train_y, real

In [4]:
def datapoint_tree(node, i, test_X, test_real, test_t):
    if node in L: #if datapoint has reached leaf node, calculate error
        index = treatments[node]
        ideal_outcome = max(test_real.iloc[i, :])
        difference = ideal_outcome - test_real.iloc[i, index]
        if difference == 0:
            count_optimal = 1
        else:
            count_optimal = 0
        
        if index == test_t[i]:
            same_treatment = 1
        else:
            same_treatment = 0
        return difference, count_optimal, same_treatment
    if test_X.iloc[i, branching[node]] <= 0: # go left (node 2)
        return datapoint_tree(tree.get_left_children(node), i, test_X, test_real, test_t)
    else:
        return datapoint_tree(tree.get_right_children(node), i, test_X, test_real, test_t)

In [5]:
def get_metrics(test_X, test_real, test_t):
    difference = 0
    count_optimal = 0
    count_same = 0
    for i in range(len(test_X)):
        diff, optimal, treat = datapoint_tree(1, i, test_X, test_real, test_t)
        difference += diff
        count_optimal += optimal
        count_same += treat
    return difference, float(count_optimal)/len(test_X), float(count_same)/len(test_X)

## Declaring variables determined a-priori

### a) Specfiying Input to Model
- m treatments indexed by t {1,..., m}
- n datapoints indexed by i {(X1, T1, Y1), ..., (Xn, Tn, Yn)} 
- d: depth of decision tree
- n_min: minimum number of datapoints of each treatment in node p
- num_features
- num_cuts

In [6]:
def run_tree(depth, bert):
    delta_include = False
    different_Cp = False
    bertsimas = bert

    m = {0, 1, 2, 3, 4, 5}
    n = len(train_X)
    d = int(depth)
    n_min = 0
    num_features = 2
    num_cuts = 2

    """# ---- CONSTRUCTING COMPLETE BINARY TREE ----
    # - P = number of nodes in the tree
    # - L_c = set of non-leaf nodes
    # - L = set of leaf ndoes"""

    P = 2**d
    L_c = set(range(1, 2**(d-1)))
    L = set(range(2**(d-1), P))

    # - ancestors: dictionary {leaf nodes: {set of ancestors}}
    ancestors = {}
    for p in L:
        ancestors[p] = [math.floor(p/(2**j)) for j in range(1, d)]

        # Alternative way of retrieving right/left ancestors
    tree = Tree(d)
    right_left = tree.get_right_left()


    """
    - C[p]: finite set of cuts on node p--determined apriori {(l, theta)}
        Representation: dictionary {non-leaf node: list of (l, theta)}
        Require a list instead of set so it's ordered (indexed easily)


    From Kallus' paper, he wants us to:
    1. For each l in [d], sort data along x_l
    2a. For each non-leaf node, pick #features from [d] randomly
    2b. Set J = {1, (n-1/#cuts), ..., n-1} in decreasing order of cuts
    2c. Set Cp = {(l, midpoint between the two buckets in J) for all dimensions chosen and for all j in J}

    Make version of ALG3 to take all features"""


    if different_Cp:
        pass
    else:
        # -- BINARY COVARIATES --> Create a finite set of cuts for C for all features --
        C = []
        for i in range(len(train_X.columns)):
            C.append((i, 0))


    """ --- OTHER DATA --- 
    BIG M Constraints:
    - Ybar
    - Ymax
    - M

    BINARY ENCODING FOR CUTS
    - k_p: dictionary {non-leaf node p: k_p value}
    - Z_p: dictionary {non-leaf node p: k_p x |C_p| 2d matrix}
    """

    # ---- Big M Constraints ----
    # Ybar is merged into data, but it could not be. ybar is a numpy array
    minimum = min(train_y)

    ybar = train_y - minimum
    #data['ybar'] = ybar

    # Ymax
    ymax = max(ybar)

    # M
    # Find all sums for treatments 1, ..., m
    #treatment_counts = treatment.value_counts().to_list()
    unique, counts = np.unique(train_t, return_counts=True)
    #frequencies = numpy.asarray((unique, counts)).T
    M = np.array(counts)
    M -= len(L) * n_min
    M = max(M)
    
    model = gp.Model("Kallus")
    #model.params.TimeLimit = 3600

    # -- VARIABLE DECLARATION --

    # -- Variables to determine: gamma and lambda --
    # 1. gamma_p = choice of cut at node p ([0, 1]^C_p) (only applies to non-leaf node)
    #       - represent with a matrix gamma (|L_c| x |C_p|)

    gamma = model.addVars(L_c, len(C), vtype=GRB.BINARY, name='gamma') 
    # This assumes gamma is binary
    #gamma = model.addVars(L_c, len(C), vtype=GRB.BINARY, name='gamma') 

    # 2. lambda_pt = choice of treatment t at node p (only applies to leaf nodes L)
    #       - represent with a matrix lamb (|L| x m)
    lamb = model.addVars(L, m, vtype=GRB.BINARY, name='lamb')


    # -- Other Variables in Formulation --
    # 1. w_ip = membership of datapoint i in node p (only applies to leaf nodes L)
    #       - represent with a matrix w (n x |L|)

    w = model.addVars(n, L, lb=0, ub=1, name='w')
    # This assumes w is binary, when in reality it is continuous from 0-1
    #w = model.addVars(n, L, vtype=GRB.BINARY, name='w') # Original paper has this be a continuous variable

    # 2. mu_p = mean outcome of prescribed treatment in node p
    #       - represent with a matrix mu (|L|)
    mu = model.addVars(L, lb=0, name='mu') # define in constraint

    # 3. nu_ip = "effect" of treatment in node p by multiplying mu and w
    #       - represent with a matrix nu (n x |L|)
    nu = model.addVars(n, L, lb=0, name='nu')

    # 4. delta_p = forces only 1 choice of cut at node p
    #       - represent with a dictionary {non-leaf node p: 1d matrix of size k_p}
    #delta = model.addVars(L, k, vtype=GRB.BINARY, name='delta')

    # 5. Chi_i(gamma) = 1 if choice of cut induces datapoint i to go left on the cut gamma, 0 otherwise
    chi = model.addVars(L_c, n, vtype=GRB.BINARY, name='chi')

    if bertsimas:
        # 6. f_i
        f = model.addVars(n, lb=0, name='f')

        # 7. Beta_lt
        beta = model.addVars(L, m, lb=0, name='beta')

    theta = 0.5
    
        # --- OBJECTIVE FUNCTION ---
    if bertsimas:
        model.setObjective(theta * gp.quicksum(nu[i, p] for i in range(n) for p in L) 
                       - (1-theta) * gp.quicksum((train_y[i] - f[i]) * (train_y[i] - f[i]) for i in range(n)), GRB.MAXIMIZE)
    else:
        model.setObjective(gp.quicksum(nu[i, p] for i in range(n) for p in L), GRB.MAXIMIZE)


    # --- CONSTRAINTS ---
    # Constraint 4c (4b is done by definition of variables)
    if delta_include:
        for p in L_c:
            # need to do matrix multiplication somehow, but this might work?
            for j in range(k):
                model.addConstr(delta[p, j] == gp.quicksum(gamma[p, i] * z[j, i] for i in range(len(C))))

    # Additional constraint that gamma[p] adds up to 1
    # CHECKED
    for p in L_c:
        model.addConstr(gp.quicksum(gamma[p, i] for i in range(len(C))) == 1)

    # Add constraint Chi
    for i in range(n):
        for p in L_c:
            model.addConstr(chi[p, i] == gp.quicksum(gamma[p, j] for j in range(len(C)) if C[j][1] >= train_X.iloc[i, C[j][0]]))


    # Constraint 4d&e (Membership restriction from its ancestors) CHECKED
    for p in L:
        A_p = ancestors[p] #index ancestors of p
        for q in A_p:
            R_pq = right_left[(p, q)]
            for i in range(n):
                model.addConstr(w[i, p] <= (1+R_pq)/2 - R_pq * chi[q, i])


    #4e CHECKED
    for p in L:
        A_p = ancestors[p] #index ancestors of p
        for i in range(n):
            model.addConstr(w[i, p] >= 1 + gp.quicksum(-chi[q, i] for q in A_p if right_left[(p, q)] == 1)
                        + gp.quicksum(-1+chi[q, i] for q in A_p if right_left[(p, q)] == -1))


    # Constraint 4f
    # CHECKED
    for t in m:
        for p in L:
            model.addConstr(gp.quicksum(w[i, p] for i in range(n) if train_t[i] == t) >= n_min) #assuming the input comes in vector (Xi, Ti, Yi)
            # only add the datapoints that have been given treatment t

    # Constraints 4g&h (Linearization of nu)
    # CHECKED
    for p in L:
        for i in range(n):
            model.addConstr(nu[i, p] <= ymax * w[i, p])
            model.addConstr(nu[i, p] <= mu[p])
            model.addConstr(nu[i, p] >= mu[p] - ymax * (1-w[i, p]))

    # Constraint 4i (Choice of treatment applied to p)
    # CHECKED
    for p in L:
        model.addConstr(gp.quicksum(lamb[p, t] for t in m) == 1)

    # Constraint 4j&k (Consistency between lambda and mu)
    # CHECKED. There are some inconsistencies where some w don't appear, but this is because ybar is 0 (i.e. the minimum)
    for p in L:
        for t in m:
            model.addConstr(gp.quicksum(nu[i, p] - w[i, p] * ybar[i] for i in range(n) if train_t[i] == t) <= M*(1-lamb[p, t]))
            model.addConstr(gp.quicksum(nu[i, p] - w[i, p] * ybar[i] for i in range(n) if train_t[i] == t) >= M*(lamb[p, t]-1))
    #model.addConstr(lamb[2, 0] == 1)
    if bertsimas:
        for i in range(n):
            for p in L:
                for t in m:
                    if train_t[i] == t:
                        model.addConstr(f[i] - beta[p, t] <= M * (1-w[i, p]))
                        model.addConstr(f[i] - beta[p, t] >= M * (w[i, p]-1))
    
    #model.params.TimeLimit = 3600
    timeLimit = 3600
    oldSolutionLimit = model.Params.SolutionLimit
    model.Params.SolutionLimit = 1
    model.optimize()
    model.Params.TimeLimit = timeLimit - model.getAttr(GRB.Attr.Runtime)
    model.Params.SolutionLimit = oldSolutionLimit - model.Params.SolutionLimit
    model.optimize()
    
    
    #model.optimize()
    g = model.getAttr("X", gamma).items()
    l = model.getAttr("X", lamb).items()
    
    branching = {i[0][0]: i[0][1] for i in g if i[1] == 1.0}
    treatments = {i[0][0]: i[0][1] for i in l if i[1] == 1.0}
    
    return branching, treatments


In [7]:
"""kallus = pd.DataFrame(columns=['Dataset','Depth','P(Correct Treatment)','Test Error','Test % Optimal',
                               'Test % Same Treatment', 'Train Error', 'Train % Optimal', 
                               'Train % Same Treatment', 'Tree'])
bertsimas = pd.DataFrame(columns=['Dataset','Depth','P(Correct Treatment)','Test Error','Test % Optimal',
                               'Test % Same Treatment', 'Train Error', 'Train % Optimal', 
                                  'Train % Same Treatment', 'Tree'])"""


#bertsimas = pd.read_csv('bertsimas_athey_500.csv')

FileNotFoundError: [Errno 2] File b'bertsimas_athey_500.csv' does not exist: b'bertsimas_athey_500.csv'

In [None]:
kallus = pd.DataFrame(columns=['Dataset','Depth','P(Correct Treatment)','Test Error','Test % Optimal',
                               'Test % Same Treatment', 'Train Error', 'Train % Optimal', 
                               'Train % Same Treatment', 'Tree'])


prob = 0.5
bert = False

depths = [2, 3]
datasets = [3, 4, 5]
#probs = [0.1, 0.5, 0.9]

for dataset in datasets:
    for d in depths:
        train_filepath = 'data/IST_2000_binary/data_train_enc_' + str(dataset) + '.csv'
        test_filepath = 'data/IST_2000_binary/data_test_enc_' + str(dataset) + '.csv'
        train_X, train_t, train_y, train_real = open_file(train_filepath)
        branching, treatments = run_tree(d, bert)

        tree = Tree(d)
        L = set(range(2**(d-1), 2**d))
        test_X, test_t, test_y, test_real = open_file(test_filepath)
        error, pct, acc = get_metrics(test_X, test_real, test_t)
        error1, pct1, acc1 = get_metrics(train_X, train_real, train_t)

        tree_stats = 'branching = ' + str(branching) + ', treatments = ' + str(treatments)
        row = [dataset, d, prob, error, pct*100, acc*100, error1, pct1*100, acc1*100, tree_stats]
        print(row)
        if bert:
            bertsimas.loc[len(bertsimas)] = row
        else:
            kallus.loc[len(kallus)] = row


Changed value of parameter SolutionLimit to 1
   Prev: 2000000000  Min: 1  Max: 2000000000  Default: 2000000000
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 22039 rows, 10025 columns and 74110 nonzeros
Model fingerprint: 0x2d7d9669
Variable types: 8002 continuous, 2023 integer (2023 binary)
Coefficient statistics:
  Matrix range     [1e+00, 6e+02]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 6e+02]
Presolve removed 8008 rows and 3998 columns
Presolve time: 0.22s
Presolved: 14031 rows, 6027 columns, 50367 nonzeros
Variable types: 4002 continuous, 2025 integer (2019 binary)

Root relaxation: objective 2.000000e+03, 7403 iterations, 0.32 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 2000.00000    0 1585      

H 2457   977                    1477.6002551 2000.00000  35.4%   381 1938s
  2502  1043 2000.00000   21   97 1477.60026 2000.00000  35.4%   376 1940s
  3048  1036     cutoff   42      1477.60026 2000.00000  35.4%   332 1947s
  3090  1040 1554.42526   39  115 1477.60026 2000.00000  35.4%   329 1950s
  3602  1133 1493.33730   31  127 1477.60026 2000.00000  35.4%   300 1955s
  3883  1100 infeasible   34      1477.60026 2000.00000  35.4%   289 1960s
  4706  1194 1477.94157   36  162 1477.60026 2000.00000  35.4%   268 1965s
  5648  1562 2000.00000   23   89 1477.60026 2000.00000  35.4%   248 1970s
  6253  1732 2000.00000   27    5 1477.60026 2000.00000  35.4%   238 1976s
  6696  1940 1845.12918   38    5 1477.60026 2000.00000  35.4%   233 1980s
  6999  2073     cutoff   36      1477.60026 2000.00000  35.4%   229 1985s
  7954  2499 2000.00000   29    7 1477.60026 2000.00000  35.4%   217 1990s
  8532  2731 1493.89565   36    2 1477.60026 2000.00000  35.4%   214 1995s
  9571  3150 1502.98858  

 40903  6286 1953.50995   40  165 1520.80984 1954.37967  28.5%   232 2864s
 41224  6386 1799.49784   46  270 1520.80984 1954.37967  28.5%   232 2869s
 41588  6484 1578.23367   46  193 1520.80984 1954.37967  28.5%   233 2875s
 42028  6655 1850.80311   43  303 1520.80984 1954.37967  28.5%   233 2885s
 42767  6726 1550.96576   48  145 1520.80984 1954.37967  28.5%   234 2891s
 43092  6856 1810.85714   49  285 1520.80984 1954.37967  28.5%   235 2897s
 43481  6983 1873.69247   43  374 1520.80984 1954.37967  28.5%   235 2904s
 43916  7102     cutoff   60      1520.80984 1954.37967  28.5%   236 2910s
 44319  7179 1827.90575   47  367 1520.80984 1954.37967  28.5%   236 2916s
 44721  7293 1700.23391   51  151 1520.80984 1954.37967  28.5%   237 2922s
 45130  7406 1842.96688   45   82 1520.80984 1954.37967  28.5%   237 2928s
 45603  7500 1954.37967   46  157 1520.80984 1954.37967  28.5%   238 2934s
 45923  7624 1644.18726   52  293 1520.80984 1954.37967  28.5%   238 2940s
 46383  7714 1841.92012  

 86956 15800 1733.47203   54  274 1520.80984 1942.33333  27.7%   255 3528s
 87350 15900 1784.21265   51   88 1520.80984 1941.64354  27.7%   256 3533s
 87781 15978 1804.72376   43  132 1520.80984 1940.89135  27.6%   256 3538s
 88143 16065 1779.28934   42  287 1520.80984 1940.09677  27.6%   256 3542s
 88567 16101     cutoff   53      1520.80984 1939.39672  27.5%   256 3547s
 88942 16164 1829.06111   51  203 1520.80984 1938.88793  27.5%   256 3552s
 89388 16216     cutoff   56      1520.80984 1938.06686  27.4%   256 3556s
 89792 16260 1726.51347   52  177 1520.80984 1937.58365  27.4%   256 3561s
 90181 16317 1879.76744   46  210 1520.80984 1936.94231  27.4%   256 3566s
 90620 16393 1722.94790   49  175 1520.80984 1936.20094  27.3%   256 3572s
 91102 16444 1771.30808   50  126 1520.80984 1935.44707  27.3%   256 3576s
 91513 16523     cutoff   50      1520.80984 1934.82243  27.2%   256 3581s
 91925 16600     cutoff   55      1520.80984 1934.38819  27.2%   256 3586s
 92391 16655     cutoff  

   354   210     cutoff   24      1411.25028 2992.80000   112%   571  951s
H  357   210                    1421.9964167 2992.80000   110%   568  951s
   399   239 2000.00000    5  871 1421.99642 2740.00000  92.7%   544  955s
H  408   239                    1422.0396497 2740.00000  92.7%   545  955s
H  441   267                    1422.1867926 2740.00000  92.7%   521  957s
   451   287 1917.30331   14    5 1422.18679 2740.00000  92.7%   518  960s
H  468   286                    1427.1867926 2740.00000  92.0%   509  960s
H  503   280                    1441.0412370 2740.00000  90.1%   489  960s
   562   315     cutoff   21      1441.04124 2000.00000  38.8%   483  965s
   654   393 1916.78002   18  825 1441.04124 2000.00000  38.8%   468  970s
   734   426 1617.15013   11 1607 1441.04124 2000.00000  38.8%   460  978s
H  739   396                    1461.3545883 2000.00000  36.9%   459  978s
   800   421 1601.75173   12 1690 1461.35459 2000.00000  36.9%   445  990s
   863   463 1974.78652  

 34054  5716     cutoff   47      1508.83804 1936.59091  28.3%   211 2063s
 34074  5852 1933.51111   34    7 1508.83804 1936.59091  28.3%   211 2068s
 34541  5769     cutoff   43      1508.83804 1936.59091  28.3%   212 2074s
 34827  5753 1536.33333   47    2 1508.83804 1936.59091  28.3%   213 2079s
 35198  5647     cutoff   36      1508.83804 1936.59091  28.3%   214 2087s
 35270  5742 1936.59091   33  147 1508.83804 1936.59091  28.3%   214 2092s
 35700  5698 1555.28421   42   82 1508.83804 1936.59091  28.3%   215 2098s
 36155  5614 1579.57189   45  144 1508.83804 1936.59091  28.3%   215 2104s
 36552  5558     cutoff   45      1508.83804 1936.59091  28.3%   216 2110s
 36955  5535 1641.21639   40  127 1508.83804 1936.59091  28.3%   217 2115s
 37370  5506     cutoff   41      1508.83804 1936.59091  28.3%   217 2121s
 37783  5373     cutoff   40      1508.83804 1936.59091  28.3%   218 2130s
 37887  5461 1573.32742   43  142 1508.83804 1936.59091  28.3%   218 2136s
 38377  5384 1843.84962  

 81549 10474     cutoff   43      1508.83804 1817.15103  20.4%   242 2665s
 82313 10463     cutoff   48      1508.83804 1815.14478  20.3%   242 2673s
 82671 10458 1658.78793   36  153 1508.83804 1814.06014  20.2%   242 2677s
 83018 10470 1695.75533   43  127 1508.83804 1813.08629  20.2%   243 2682s
 83478 10481     cutoff   43      1508.83804 1812.13017  20.1%   243 2687s
 83947 10493 1602.12174   38    4 1508.83804 1811.19595  20.0%   243 2691s
 84353 10504 1625.00193   38  120 1508.83804 1810.26797  20.0%   243 2695s
 84824 10505     cutoff   39      1508.83804 1809.44917  19.9%   243 2700s
 85543 10512 1538.17595   43  159 1508.83804 1807.44928  19.8%   243 2709s
 85930 10517 1555.16707   37   85 1508.83804 1806.81034  19.7%   243 2713s
 86351 10504 1783.57691   45  167 1508.83804 1806.39167  19.7%   243 2717s
 86685 10542 1622.94264   38  144 1508.83804 1805.61184  19.7%   243 2721s
 87111 10548 1524.10849   41  143 1508.83804 1804.45861  19.6%   243 2725s
 87441 10541     cutoff  

 153889 11630 1559.43904   43    2 1508.83804 1632.37160  8.19%   230 3531s
 154836 11649     cutoff   43      1508.83804 1630.87880  8.09%   229 3537s
 155546 11683 1594.91805   35  165 1508.83804 1630.75676  8.08%   229 3541s
 156176 11694 1526.43026   43   80 1508.83804 1630.07692  8.04%   229 3545s
 156647 11691 1513.55028   44    2 1508.83804 1629.77778  8.02%   228 3550s
 157479 11711     cutoff   51      1508.83804 1628.17054  7.91%   228 3555s
 157900 11748     cutoff   44      1508.83804 1627.58498  7.87%   228 3560s
 159175 11703 1577.82195   45  126 1508.83804 1626.04098  7.77%   228 3567s
 159561 11695     cutoff   45      1508.83804 1626.01967  7.77%   228 3570s
 160359 11776     cutoff   51      1508.83804 1624.88268  7.69%   227 3575s
 160828 11778     cutoff   46      1508.83804 1624.88268  7.69%   227 3581s
 161825 11817 1594.33333   44   83 1508.83804 1623.15985  7.58%   227 3586s
 162789 11818 1518.57774   48    2 1508.83804 1622.35593  7.52%   227 3592s
 163342 1183

In [8]:
kallus.to_csv('Results_IST_binary/kallus2.csv')

In [11]:
dataset = 4
d = 2
bert = False
train_filepath = 'data/IST_2000/data_train_enc_' + str(dataset) + '.csv'
test_filepath = 'data/IST_2000/data_test_enc_' + str(dataset) + '.csv'
train_X, train_t, train_y, train_real = open_file(train_filepath)
branching, treatments = run_tree(d, bert)

tree = Tree(d)
L = set(range(2**(d-1), 2**d))
test_X, test_t, test_y, test_real = open_file(test_filepath)
error, pct, acc = get_metrics(test_X, test_real, test_t)
error1, pct1, acc1 = get_metrics(train_X, train_real, train_t)

tree_stats = 'branching = ' + str(branching) + ', treatments = ' + str(treatments)
row = [dataset, d, error, pct*100, acc*100, error1, pct1*100, acc1*100, tree_stats]
print(row)

Changed value of parameter TimeLimit to 3600.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 22039 rows, 10039 columns and 99696 nonzeros
Model fingerprint: 0xb0b31bf7
Variable types: 8002 continuous, 2037 integer (2037 binary)
Coefficient statistics:
  Matrix range     [3e-02, 6e+02]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [9e-01, 6e+02]
Presolve removed 8034 rows and 4022 columns
Presolve time: 0.58s
Presolved: 14005 rows, 6017 columns, 79167 nonzeros
Variable types: 4002 continuous, 2015 integer (2015 binary)

Root relaxation: objective 1.897781e+03, 7002 iterations, 0.98 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 1897.78083    0  568          - 1897.78083  

In [76]:
tree = Tree(d)
L = set(range(2**(d-1), 2**d))
test_X, test_t, test_y, test_real = open_file(test_filepath)
error, pct, acc = get_metrics(test_X, test_real, test_t)
error1, pct1, acc1 = get_metrics(train_X, train_real, train_t)
print(error, pct, acc)
print(error1, pct1, acc1)

267.9863312540136 0.4446 0.5066
21.252969854711775 0.354 0.5


In [77]:
tree_stats = 'branching = ' + str(branching) + ', treatments = ' + str(treatments)
row = [dataset, d, prob, error, pct*100, acc*100, error1, pct1*100, acc1*100, tree_stats]
if bert:
    bertsimas.loc[len(bertsimas)] = row
else:
    kallus.loc[len(kallus)] = row

[5, 3, 0.9, 267.9863312540136, 44.46, 50.660000000000004, 21.252969854711775, 35.4, 50.0, 'branching = {1: 10, 2: 16, 3: 0}, treatments = {4: 1, 5: 1, 6: 1, 7: 1}']


In [78]:
print(bertsimas)
bertsimas.to_csv('bertsimas_athey_500.csv', index=False)

    Dataset  Depth  P(Correct Treatment)  Test Error  Test % Optimal  \
0         1      2                   0.5   53.227602           76.83   
1         1      3                   0.5  123.833930           66.00   
2         1      3                   0.5  123.833930           66.00   
3         2      2                   0.5   24.915258           84.31   
4         2      3                   0.5   24.915258           84.31   
5         3      2                   0.5  166.280038           58.61   
6         3      3                   0.5   57.544135           77.50   
7         4      2                   0.5  432.810829           24.05   
8         4      3                   0.5  231.607156           44.69   
9         5      2                   0.5  189.645268           55.55   
10        5      3                   0.5   36.072978           82.59   
11        1      2                   0.9  224.473595           49.78   
12        1      3                   0.9  224.473595           4

### Summary of Performance for Athey's Synthetic Data

In [247]:
# SUMMARY: KALLUS on Athey
summary = {'Method': ['Dataset 1', 'Dataset 1', 'Dataset 2', 'Dataset 2', 'Dataset 3',
                     'Dataset 3', 'Dataset 4', 'Dataset 4', 'Dataset 5', 'Dataset 5', 'Dataset 1', 
                      'Dataset 1', 'Dataset 2', 'Dataset 2', 'Dataset 3',
                     'Dataset 3', 'Dataset 4', 'Dataset 4', 'Dataset 5', 'Dataset 5'],
           'Depth': [2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3],
           'P(Correct Treatment)': ['0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.9', '0.9',
                                   '0.9', '0.9', '0.9', '0.9', '0.9', '0.9', '0.9', '0.9'],
          'Error': [74.67, 74.67, 46.40, 79.86, 145, 109.21, 18.15, 18.15, 152.55, 90.47, 199.70, 238.67, 219.02, 
                    239.10, 201.21, 201.21, 99.23, 88.12, 196.07, 156.84],
          '% Classified': [71.97, 71.97, 78.49, 71.11, 61.22, 68.03, 88.33, 88.33, 60.41, 70.03, 53.35,
                          47.49, 49.91, 47.49, 53.42, 53.42, 71.19, 74.11, 54.12, 58.02]}
summary = pd.DataFrame(summary)
summary.to_csv('kallus_tree_athey.csv', index=False)

In [248]:
# SUMMARY: Bertsimas on Athey
summary = {'Method': ['Dataset 1', 'Dataset 1', 'Dataset 2', 'Dataset 2', 'Dataset 3',
                     'Dataset 3', 'Dataset 4', 'Dataset 4', 'Dataset 5', 'Dataset 5', 'Dataset 1', 
                      'Dataset 1', 'Dataset 2', 'Dataset 2', 'Dataset 3',
                     'Dataset 3', 'Dataset 4', 'Dataset 4', 'Dataset 5', 'Dataset 5'],
           'Depth': [2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3],
           'P(Correct Treatment)': ['0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.5', '0.9', '0.9',
                                   '0.9', '0.9', '0.9', '0.9', '0.9', '0.9', '0.9', '0.9'],
          'Error': [74.67, 74.67, 46.40, 55.17, 145, 109.21, 18.15, 18.15, 152.55, 90.47, 199.70, 154.07, 219.02, 
                    180.83, 201.21, 116.75, 99.23, 88.12, 196.07, 156.84],
          '% Classified': [71.97, 71.97, 78.49, 76.49, 61.22, 68.03, 88.33, 88.33, 60.41, 70.03, 53.35,
                          62.17, 49.91, 53.91, 53.42, 71.59, 71.19, 74.11, 54.12, 58.02]}
summary = pd.DataFrame(summary)
summary.to_csv('bertsimas_tree_athey.csv', index=False)


In [37]:
def datapoint_tree_avg(node, i):
    if node in L: #if datapoint has reached leaf node, calculate error
        return node
    if train_X.iloc[i, branching[node]] <= 0: # go left (node 2)
        return datapoint_tree_avg(tree.get_left_children(node), i)
    else:
        return datapoint_tree_avg(tree.get_right_children(node), i)

In [None]:
summation = {20: 0, 21: 0, 30: 0, 31: 0}
count = {20: 0, 21: 0, 30: 0, 31: 0}

for i in range(n):
    leaf_node = datapoint_tree_avg(1, i)
    index = str(leaf_node) + str(train_t[i])
    summation[int(index)] += train_y[i]
    count[int(index)] += 1
    
avg = {}
for i in summation:
    avg[i] = float(summation[i]) / count[i]

print(avg)

In [49]:
summation = {2: 0, 3: 0}
count = {2: 0, 3: 0}

for i in range(n):
    leaf_node = datapoint_tree_avg(1, i)
    if train_t[i] == treatments[leaf_node]:
        summation[leaf_node] += train_y[i]
        count[leaf_node] += 1
    
avg = {}
for i in summation:
    avg[i] = float(summation[i]) / count[i]

print(avg)

{2: 0.39851651102745433, 3: 0.34277517675515534}


In [59]:
N = L.union(L_c)
print(N)

model = gp.Model("Nathan")

# -- VARIABLE DECLARATION --

# -- Variables to determine: gamma and lambda --
# 1. gamma_p = choice of cut at node p ([0, 1]^C_p) (only applies to non-leaf node)
#       - represent with a matrix gamma (|L_c| x |C_p|)

gamma = model.addVars(L_c, len(C), vtype=GRB.BINARY, name='gamma') 
# This assumes gamma is binary
#gamma = model.addVars(L_c, len(C), vtype=GRB.BINARY, name='gamma') 

# 2. lambda_pt = choice of treatment t at node p (only applies to leaf nodes L)
#       - represent with a matrix lamb (|L| x m)
lamb = model.addVars(L, m, vtype=GRB.BINARY, name='lamb')


# -- Other Variables in Formulation --
# 1. w_ip = membership of datapoint i in node p (only applies to leaf nodes L)
#       - represent with a matrix w (n x |L|)

c = model.addVars(n, N, vtype=GRB.BINARY, name='c')
# This assumes w is binary, when in reality it is continuous from 0-1
#w = model.addVars(n, L, vtype=GRB.BINARY, name='w') # Original paper has this be a continuous variable

# 2. mu_p = mean outcome of prescribed treatment in node p
#       - represent with a matrix mu (|L|)
v = model.addVars(n, L_c, vtype=GRB.BINARY, name='v') # define in constraint

# 3. nu_ip = "effect" of treatment in node p by multiplying mu and w
#       - represent with a matrix nu (n x |L|)
w = model.addVars(n, vtype=GRB.BINARY, name='w')

{1, 2, 3}


In [None]:


"""if different_Cp:
# ---- K and Z if we followed the definition of C_p from the paper -----
    for p in L_c:
        k[p] = math.ceil(math.log2(len(C[p])))

    z = {}
    for p in L_c:
        matrix = np.zeros((k[p], len(C[p])))
        print(matrix.shape)
        for i in range(1, k[p]+1):
            for j in range(1, len(C[p])+1):
                if math.floor(j/(2**i)) % 2 == 1: # odd number
                    z[i-1, j-1] = 1
                else:
                    z[i-1, j-1] = 0
        z[p] = matrix

else:
    # ---- K and Z if we had constant C for all nodes ----
    k = math.ceil(math.log2(len(C)))
    z = np.zeros((k, len(C)))
    for i in range(1, k+1):
        for j in range(1, len(C)+1):
            if math.floor(j/(2**i)) % 2 == 1: # odd number
                z[i-1, j-1] = 1
            else:
                z[i-1, j-1] = 0
print(z)"""