## Boolean Compressed Sensing 

### Naive Split

In [None]:
def create_design_matrices(df, quantiles):
    """Create A_N and A_P"""
    measurement = pd.DataFrame()
    measurement['Salvage_Good'] = df['Salvage_Good']
    labels = ['Q' + str(q) for q in range(1, quantiles+1)]
    bins = []

    for col in df:
        if col == 'Result' or col == 'Salvage_Good':
            continue

        try:
            measurement[col], b = pd.qcut(df[col], quantiles, labels=labels, retbins=True)
            bins.append(b)
        except ValueError:
            labels = ['Q' + str(q) for q in range(1, quantiles-1)]
            measurement[col] = pd.qcut(heavy_df[col], quantiles-2, labels=labels)
            labels = ['Q' + str(q) for q in range(1, quantiles)]

    measurement = pd.get_dummies(measurement, drop_first=False)
    A_p = measurement[measurement['Salvage_Good'] == 1].drop('Salvage_Good', axis=1)
    A_n = measurement[measurement['Salvage_Good'] != 1].drop('Salvage_Good', axis=1)


    return A_p.to_numpy(), A_n.to_numpy(), measurement.to_numpy(), list(A_p.columns), bins

In [None]:
def create_test_matrix(df, features, bins):
    temp = pd.DataFrame()
    for i, col in enumerate(df):
        temp[features[i]] = df[col].apply(
            lambda x: 1 if x >= bins[i][1] else 0
        )

    return temp

In [None]:
A_p, A_n, measurement, features, bins = create_design_matrices(train, 2)

In [None]:
test_design = create_test_matrix(X_test, features, bins)

### Rule Set using Rule Fit 

### Learn Sparse Rules

In [None]:
import gurobipy as gp
from gurobipy import GRB

In [None]:
m = gp.Model("rule-extraciton")

In [None]:
w = m.addMVar(shape=A_p.shape[1], vtype=GRB.BINARY, name="weights")
psi_p = m.addMVar(shape=A_p.shape[0], name="psi_p")
psi_n = m.addMVar(shape=A_n.shape[0], name="psi_n")

In [None]:
m.addConstr(w <= 1.0)
m.addConstr(w >= 0.0)
m.addConstr(psi_p <= 1)
m.addConstr(psi_p >= 0)
m.addConstr(psi_n >= 0)
m.addConstr(A_p @ w + psi_p >= 1.0)
m.addConstr(A_n @ w == psi_n)
m.update()

In [None]:
m.setObjective(sum(w) + 1000 * (sum(psi_p) + sum(psi_n)), GRB.MINIMIZE)

In [None]:
m.optimize()

In [None]:
m.getVarByName("weights[0]")
for i in range(measurement.shape[1]):
    print(m.getVarByName("weights[" + str(i) + "]"))

## Evaluate

### Raw Accuracy

In [None]:
features[18]

In [None]:
features[19]

## Analysis 