In [4]:
using CSV, DataFrames, Statistics, Random;

In [5]:
seed = 123;

In [6]:
results = DataFrame(lambda = [], base_score = [], base_score_actual = [], new_score = [])
train_scores_df = DataFrame(AUC = [], treatment = [], non_treatment = [])
test_scores_df = DataFrame(AUC = [], treatment = [], non_treatment = [])


# identify outcomes
lambda_values = 0:0.2:1
for lambda in lambda_values

    println("Loop for lambda = $lambda")

    df = CSV.read("data/input/imputed_diabetes_patients_20231205.csv", DataFrame);

    # split into train and test
    train_df = filter(row -> row.train_flag == 1, df)
    test_df = filter(row -> row.train_flag == 0, df)

    
    train_outcomes = lambda .* train_df.phys_health_days + (1-lambda) .* train_df.ment_health_days
    test_outcomes = lambda .* test_df.phys_health_days + (1-lambda) .* test_df.ment_health_days
    
    # identify treatments
    train_treatments = train_df.diabetes_course
    test_treatments = test_df.diabetes_course
    
    # identify features
    drop_vars = [
        "phys_health_status", "phys_health_days", "ment_health_status", "ment_health_days", 
        "diabetes_course", 
        "depressed_household", "alcohol_household", "drugs_household", "prison_household", "sleep",
        "train_flag"
    ]
        
    train_X = select!(train_df, Not(drop_vars))
    test_X = select!(test_df, Not(drop_vars));
    
    model_propensity = IAI.GridSearch(
    IAI.XGBoostClassifier(
        random_seed = seed,
        criterion = :entropy,
        max_categoric_levels_before_warning = 15
    ), 
    minbucket = [5, 10, 20, 50],
    max_depth = [3, 5, 7, 9], 
    num_estimators = [20, 50, 100, 200]
    );

    model_outcome = IAI.GridSearch(
    IAI.XGBoostRegressor(
        random_seed = seed,
        criterion = :tweedie,
        max_categoric_levels_before_warning = 15
    ), 
    minbucket = [5, 10, 20, 50],
    max_depth = [3, 5, 7, 9], 
    num_estimators = [20, 50, 100, 200]
    );

    reward_lnr = IAI.CategoricalRegressionRewardEstimator(
    propensity_estimator = model_propensity,
    outcome_estimator = model_outcome,
    reward_estimator = :doubly_robust,
    random_seed = seed,
    propensity_insample_num_folds = 5, 
    outcome_insample_num_folds = 5,
    propensity_min_value = 0.001
    );

    train_predictions, train_reward_score = IAI.fit_predict!(
    reward_lnr, train_X, train_treatments, train_outcomes,
    propensity_score_criterion = :auc, 
    outcome_score_criterion = :tweedie
    );

    train_rewards = train_predictions[:reward]

    println("Rewards matrix built")

    grid = IAI.GridSearch(
    IAI.OptimalTreePolicyMinimizer(
        random_seed = seed,
        max_categoric_levels_before_warning = 20,
    ),
    max_depth = 2:6,
    minbucket = [5, 10, 20, 50]
    )

    IAI.fit!(grid, train_X, train_rewards)

    IAI.write_html("tree1207_lambda=$lambda", grid)
    IAI.write_questionnaire("questions1207_lambda=$lambda", grid)

    test_predictions, test_reward_score = IAI.fit_predict!(
    reward_lnr, test_X, test_treatments, test_outcomes,
    propensity_score_criterion = :auc, 
    outcome_score_criterion = :tweedie
    )
    
    test_rewards = test_predictions[:reward]

    base_score = mean([test_rewards[i, test_treatments[i]] for i in 1:length(test_treatments)])

    base_score_actual = mean(test_outcomes)

    pred_treatments = IAI.predict(grid, test_X)
    pred_outcomes = [test_rewards[i, pred_treatments[i]] for i in 1:length(pred_treatments)]
    new_score = mean(pred_outcomes)

    new_row = (lambda = lambda, base_score = base_score, base_score_actual = base_score_actual, new_score = new_score)
    push!(results, new_row)
    CSV.write("results_lambda_1207.csv", results)

    new_row_train = (AUC = train_reward_score[:propensity], treatment = train_reward_score[:outcome]["yes"], non_treatment = train_reward_score[:outcome]["no"])
    new_row_test = (AUC = test_reward_score[:propensity], treatment = test_reward_score[:outcome]["yes"], non_treatment = test_reward_score[:outcome]["no"])
    push!(train_scores_df, new_row_train)
    push!(test_scores_df, new_row_test)
    CSV.write("train_scores_df_1207.csv", train_scores_df)
    CSV.write("test_scores_df_1207.csv", test_scores_df)
    

end
    

    

Loop for lambda = 0.0
Rewards matrix built
Loop for lambda = 0.2
Rewards matrix built
Loop for lambda = 0.4
Rewards matrix built
Loop for lambda = 0.6
Rewards matrix built
Loop for lambda = 0.8
Rewards matrix built
Loop for lambda = 1.0
Rewards matrix built


In [12]:
results

Row,lambda,base_score,base_score_actual,new_score
Unnamed: 0_level_1,Any,Any,Any,Any
1,0.0,5.51443,4.4619,4.47131
2,0.2,5.73175,4.93875,5.0052
3,0.4,6.15759,5.4156,5.70556
4,0.6,6.6536,5.89244,6.19639
5,0.8,7.21633,6.36929,6.58153
6,1.0,7.71774,6.84614,7.1476


In [16]:
lambda = 0

df = CSV.read("data/input/imputed_diabetes_patients_20231205.csv", DataFrame);

# split into train and test
train_df = filter(row -> row.train_flag == 1, df)
test_df = filter(row -> row.train_flag == 0, df)

train_outcomes = lambda .* train_df.phys_health_days + (1-lambda) .* train_df.ment_health_days
test_outcomes = lambda .* test_df.phys_health_days + (1-lambda) .* test_df.ment_health_days

# identify treatments
train_treatments = train_df.diabetes_course
test_treatments = test_df.diabetes_course

# identify features
train_X = select!(train_df, Not(["phys_health_status", "phys_health_days", "ment_health_status", "ment_health_days", "diabetes_course", "train_flag"]))
test_X = select!(test_df, Not(["phys_health_status", "phys_health_days", "ment_health_status", "ment_health_days", "diabetes_course", "train_flag"]));

model_propensity = IAI.GridSearch(
IAI.XGBoostClassifier(
    random_seed = seed,
    criterion = :entropy,
    max_categoric_levels_before_warning = 15
), 
minbucket = [5, 10, 20, 50],
max_depth = [3, 5, 7, 9], 
num_estimators = [20, 50, 100, 200]
);

model_outcome = IAI.GridSearch(
IAI.XGBoostRegressor(
    random_seed = seed,
    criterion = :tweedie,
    max_categoric_levels_before_warning = 15
), 
minbucket = [5, 10, 20, 50],
max_depth = [3, 5, 7, 9], 
num_estimators = [20, 50, 100, 200]
);

reward_lnr = IAI.CategoricalRegressionRewardEstimator(
propensity_estimator = model_propensity,
outcome_estimator = model_outcome,
reward_estimator = :doubly_robust,
random_seed = seed,
propensity_insample_num_folds = 5, 
outcome_insample_num_folds = 5,
propensity_min_value = 0.001
);

train_predictions, train_reward_score = IAI.fit_predict!(
    reward_lnr, train_X, train_treatments, train_outcomes,
    propensity_score_criterion = :auc, 
    outcome_score_criterion = :tweedie
    );


In [28]:
train_reward_score[:propensity]

0.6132962136238772

In [31]:
train_reward_score[:outcome]["yes"]

0.2548924600029042

In [33]:
train_reward_score[:outcome]["no"]

0.21492555572579092

In [14]:
train_rewards = train_predictions[:reward]

println("Rewards matrix built")

grid = IAI.GridSearch(
IAI.OptimalTreePolicyMinimizer(
    random_seed = seed,
    max_categoric_levels_before_warning = 20,
),
max_depth = 2:6,
minbucket = [5, 10, 20, 50]
)

IAI.fit!(grid, train_X, train_rewards)

#IAI.write_html("tree1205_lambda=$lambda", grid)
#IAI.write_questionnaire("questions_lambda=$lambda", grid)

test_predictions, test_reward_score = IAI.fit_predict!(
reward_lnr, test_X, test_treatments, test_outcomes,
propensity_score_criterion = :auc, 
outcome_score_criterion = :tweedie
)

test_rewards = test_predictions[:reward]

base_score = mean([test_rewards[i, test_treatments[i]] for i in 1:length(test_treatments)])

base_score_actual = mean(test_outcomes)

pred_treatments = IAI.predict(grid, test_X)
pred_outcomes = [test_rewards[i, pred_treatments[i]] for i in 1:length(pred_treatments)]
new_score = mean(pred_outcomes)

LoadError: UndefVarError: `train_predictions` not defined