In [26]:
import pandas as pd
import numpy as np
from fasterrisk.fasterrisk import RiskScoreOptimizer, RiskScoreClassifier
from sklearn.model_selection import train_test_split
import time
from sklearn.linear_model import LogisticRegression

In [9]:
target_variable = "qualified_gagne_1"
features = ['dem_female', 'dem_age_band_18-24_tm1', 'dem_age_band_25-34_tm1', 'dem_age_band_35-44_tm1', 'dem_age_band_45-54_tm1',
            'dem_age_band_55-64_tm1', 'dem_age_band_65-74_tm1', 'dem_age_band_75+_tm1', 'hypertension_elixhauser_tm1', 'cost_dialysis_tm1',
            'cost_emergency_tm1', 'cost_home_health_tm1', 'cost_ip_medical_tm1', 'cost_ip_surgical_tm1', 'cost_laboratory_tm1',
            'cost_op_primary_care_tm1', 'cost_op_specialists_tm1', 'cost_op_surgery_tm1', 'cost_other_tm1', 'cost_pharmacy_tm1',
            'cost_physical_therapy_tm1', 'cost_radiology_tm1', 'gagne_sum_tm1']

In [10]:
df = pd.read_csv("data/obermeyer/obermeyer_data_cleaned.csv")
X = df[features]
y = (df[target_variable]*2)-1

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

In [48]:
sparsity = 5 # number of variables in model
parent_size = 2 #

RiskScoreOptimizer_m = RiskScoreOptimizer(X = X_train, y = y_train, k = sparsity, parent_size = parent_size)

In [49]:
start_time = time.time()
RiskScoreOptimizer_m.optimize()
print("Optimization takes {:.2f} seconds.".format(time.time() - start_time))

Optimization takes 4.51 seconds.


In [50]:
multipliers, sparseDiversePool_beta0_integer, sparseDiversePool_betas_integer = RiskScoreOptimizer_m.get_models()
print("We generate {} risk score models from the sparse diverse pool".format(len(multipliers)))

We generate 40 risk score models from the sparse diverse pool


In [51]:
model_index = 0 # first model
multiplier = multipliers[model_index]
intercept = sparseDiversePool_beta0_integer[model_index]
coefficients = sparseDiversePool_betas_integer[model_index]
RiskScoreClassifier_m = RiskScoreClassifier(multiplier, intercept, coefficients, X_train = X_train)

In [52]:
y_test_pred_prob = RiskScoreClassifier_m.predict_prob(X_test)
print("The risk probabilities of having y_test to be +1 are {}".format(y_test_pred_prob))

The risk probabilities of having y_test to be +1 are [0.8130893  0.86166667 0.37921286 ... 0.41701463 0.9380672  0.26107765]


In [53]:
coefficients

array([0., 0., 0., 0., 0., 0., 1., 1., 2., 0., 0., 0., 0., 0., 0., 0., 5.,
       0., 0., 0., 0., 0., 5.])

In [41]:
coefficients

array([0., 0., 0., 0., 0., 0., 1., 1., 2., 0., 4., 0., 0., 0., 5., 0., 4.,
       3., 0., 0., 0., 4., 5.])

In [34]:
test_acc, test_auc = RiskScoreClassifier_m.get_acc_and_auc(X_test, y_test)

In [35]:
test_acc, test_auc

(0.7865705944468601, 0.8987745452466757)