In [6]:
using DataFrames, CSV
using LinearAlgebra, Random, Printf, StatsBase, CategoricalArrays
using Plots, StatsPlots
using Gurobi, JuMP

Max Peruzzi
ML Final Project

In [56]:
#Data Frame for PBC data
data = CSV.read("dataclean.csv", DataFrame)

Unnamed: 0_level_0,Column1,id,time,status,trt,age,sex,ascites,hepato,spiders
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Float64,Float64,String1,Float64,Float64,Float64
1,0,1,400,2,1.0,58.7652,f,1.0,1.0,1.0
2,1,2,4500,0,1.0,56.4463,f,0.0,1.0,1.0
3,2,3,1012,2,1.0,70.0726,m,0.0,0.0,0.0
4,3,4,1925,2,1.0,54.7406,f,0.0,1.0,1.0
5,4,5,1504,1,2.0,38.1054,f,0.0,1.0,1.0
6,5,6,2503,2,2.0,66.2587,f,0.0,1.0,0.0
7,6,7,1832,0,2.0,55.5346,f,0.0,1.0,0.0
8,7,8,2466,2,2.0,53.0568,f,0.0,0.0,0.0
9,8,9,2400,2,1.0,42.5079,f,0.0,0.0,1.0
10,9,10,51,2,2.0,70.5599,f,1.0,0.0,1.0


In [57]:
#Define constants

#Number of patients
N = size(data)[1]

#Min _N and Max N_ number of patients allowed in an interpretable subset
_N = 30
N_ = 100

100

In [77]:
#Define a list of patient numbers that are in each of the two treatment groups
#Note that 2 in the data set means placebo, and 1 means experimental group

T0 = findall(data[!,"trt"].==2) #indices of placebo patients
T1 = findall(data[!,"trt"].==1) #indices of experimental patients

T = [T0, T1]

2-element Vector{Vector{Int64}}:
 [5, 6, 7, 8, 10, 11, 12, 13, 14, 16  …  296, 298, 300, 301, 303, 305, 306, 307, 309, 312]
 [1, 2, 3, 4, 9, 15, 18, 19, 22, 24  …  291, 294, 295, 297, 299, 302, 304, 308, 310, 311]

In [83]:
#Patient-wise (from patient i=1:N) whether the patient is in placebo (1) or experimental group (2)
Ti = (data[!,"trt"].==2).+1

312-element Vector{Int64}:
 1
 1
 1
 1
 2
 2
 2
 2
 1
 2
 2
 2
 2
 ⋮
 2
 1
 2
 1
 2
 2
 2
 1
 2
 1
 1
 2

In [87]:
model = Model(Gurobi.Optimizer)

#Variables
@variable(model, z[1:N], Bin) #Indicator variable - if each patient i is contained within the interpretable subgroup
@variable(model, theta[_N:N_, 1:2], Bin) #Indicator variable - if j between _N and N_ is equal to the number of patients from treatment group t within the interpretable subgroup
@variable(model, zeta[1:N,_N:N_]) #Indicator variable that is 1 iff both zi = 1 and thetaj = 1

#Constraints
@constraint(model, [t=1:2], _N <= sum(z[i] for i in T[t]) <= N_) #The number of patients within the interpretable subgroup from EACH treatment group must be within the bounds _N and N_

@constraint(model, [i=1:N, j=_N:N_], zeta[i,j] <= theta[j,Ti[i]]) #Ensuring z works as indicator variable (see variable section)
@constraint(model, [i=1:N, j=_N:N_], zeta[i,j] <= z[i])
@constraint(model, [i=1:N, j=_N:N_], zeta[i,j] >= theta[j,Ti[i]] + z[i] - 1)

@constraint(model, [t=1:2], sum(sum((1/j)*zeta[i,j] for j in _N:N_) for i in T[t]) == 1) #Confirming that the sum of the patients in the interpretable cluster equals j for each treatment group

@constraint(model, [t=1:2], sum(theta[j,t] for j=_N:N_) == 1) #Ensuring theta works as indicator variable (see variable section)

@constraint(model, [i=1:N, j=_N:N_], 0<=zeta[i,j]<=1) #Zeta bounds

Academic license - for non-commercial use only - expires 2023-08-17


2-dimensional DenseAxisArray{ConstraintRef{Model, MathOptInterface.ConstraintIndex{MathOptInterface.ScalarAffineFunction{Float64}, MathOptInterface.Interval{Float64}}, ScalarShape},2,...} with index sets:
    Dimension 1, Base.OneTo(312)
    Dimension 2, 30:100
And data, a 312×71 Matrix{ConstraintRef{Model, MathOptInterface.ConstraintIndex{MathOptInterface.ScalarAffineFunction{Float64}, MathOptInterface.Interval{Float64}}, ScalarShape}}:
 zeta[1,30] ∈ [0.0, 1.0]    …  zeta[1,100] ∈ [0.0, 1.0]
 zeta[2,30] ∈ [0.0, 1.0]       zeta[2,100] ∈ [0.0, 1.0]
 zeta[3,30] ∈ [0.0, 1.0]       zeta[3,100] ∈ [0.0, 1.0]
 zeta[4,30] ∈ [0.0, 1.0]       zeta[4,100] ∈ [0.0, 1.0]
 zeta[5,30] ∈ [0.0, 1.0]       zeta[5,100] ∈ [0.0, 1.0]
 zeta[6,30] ∈ [0.0, 1.0]    …  zeta[6,100] ∈ [0.0, 1.0]
 zeta[7,30] ∈ [0.0, 1.0]       zeta[7,100] ∈ [0.0, 1.0]
 zeta[8,30] ∈ [0.0, 1.0]       zeta[8,100] ∈ [0.0, 1.0]
 zeta[9,30] ∈ [0.0, 1.0]       zeta[9,100] ∈ [0.0, 1.0]
 zeta[10,30] ∈ [0.0, 1.0]      zeta[10,100] ∈ [0.0, 1.