In [1]:
# Before running this notebook, you need to set the following variables in structs.jl for mutable struct Args: 
#   data_string: "data_scenario1"
#   pre_transformation: true

In [2]:
cd("../.") 
pwd()

"/Users/farhadyar/Documents/Project_PTVAE/progs/github_repo/LatentSubgroups"

In [3]:
using Pkg
if isfile("Project.toml") && isfile("Manifest.toml")
    Pkg.activate(".")
end

# Pkg.instantiate()
using IJulia

using Revise



[32m[1m  Activating[22m[39m environment at `~/Documents/Project_PTVAE/progs/github_repo/LatentSubgroups/Project.toml`


In [4]:
includet("../AIQN/AIQN.jl")
includet("../src/quantile_transformation.jl")
includet("../src/structs.jl")
includet("../src/report.jl")
includet("../src/transformations.jl")
includet("../src/VAE.jl")
includet("../src/load_data.jl")
includet("../src/evaluation/evaluation.jl")
includet("../src/classification.jl")
includet("../src/GLM.jl")

# Set default DPI to 300
default(dpi=300)




In [5]:

x, dataTypeArray,args = load_dataset()

if args.data_string == "data_scenario1"
    args.pre_transformation = true
    args.scaling_method= "standardization"

else
    println("For this notebook you should set the args.data_string = data_scenario1")

    println("change the struct and restart the notebook again")
end

"standardization"

In [6]:
Random.seed!(11)
preprocess_ps = preprocess_params(input_dim = args.input_dim, pre_transformation_type = "quantile")
preprocessed_data, preprocess_ps = preprocess!(args, preprocess_ps, x, dataTypeArray)

In [None]:

args.η = 0.0001f0
args.λ = 0.01f0
args.β = 0.1
args.batch_size = 128
args.epochs = 4000
args.seed = 42
args.input_dim = 18
args.latent_dim = 2
args.hidden_dim = 21
args.synthetic_data = false     
args.multimodal_encoder = true
args.IPW_sampling = false
args.grid_point_size = 0.1



if args.hyperopt_flag
    trainVAE_hyperparams_opt!(preprocessed_data, x, dataTypeArray, preprocess_ps, args)
else
    model, training_data, loss_array_vae = trainVAE!(preprocessed_data, x, dataTypeArray, preprocess_ps, args)
end


In [None]:
z = get_latent(preprocessed_data, model, args, preprocess_ps)
E = load_exposure(args.data_string)
y = load_outcome(args.data_string)
E_O = Int.(E) .& Int.(y)

display(scatter_latent(z, "E", E,  "A) Latent representation"))
Plots.savefig("known_subgroups_only_latent_E_n_quantile$(preprocess_ps.n_quantiles).png")
Plots.savefig("known_subgroups_only_latent_E_n_quantile$(preprocess_ps.n_quantiles).pdf")

scatter_latent(z, "y", y, "A) Latent representation")

In [None]:


# data_glm = Matrix(CSV.read("./data/data_scenario1.csv" , DataFrame)[:, 2:end-2])
# x6_inclusion = true


data_glm = x
x6_inclusion = false


type_vector = [length(unique(data_glm[:, col])) > 2 for col in 1:size(data_glm, 2)]
x_μ = vec(mean(data_glm, dims=1)) .* type_vector
x_σ = (vec(std(data_glm, dims=1)) .* type_vector) .+ (0.5 * .!type_vector)
data_glm = standardize(x_μ, x_σ, data_glm)'



training_glm = hcat(data_glm, E, y)


includet("../src/GLM.jl")


exposure_model, selected_features_exposure = fit_logistic_regression_exposure(training_glm, x6_inclusion)

selected_features_outcome = fit_logistic_regression_outcome(training_glm, x6_inclusion)

e_o_model_and, selected_features_e_o_and= fit_logistic_regression_selected_features_both(training_glm,selected_features_exposure, selected_features_outcome, x6_inclusion)


e_o_model_or, selected_features_e_o_or = fit_logistic_regression_selected_features_either(training_glm,selected_features_exposure, selected_features_outcome, x6_inclusion)

In [None]:
@show selected_features_exposure
@show selected_features_outcome
@show selected_features_e_o_and
@show selected_features_e_o_or

println("")

In [None]:
date_string = Dates.format(now(), "yyyy-mm-dd HH:MM:SS")

In [None]:
includet("../visualization/visualization.jl")
Random.seed!(42)

probabilities_e = predict_probability(exposure_model, training_glm, x6_inclusion)

probabilities_e_o_and = predict_probability(e_o_model_and, training_glm,  x6_inclusion)

probabilities_e_o_or = predict_probability(e_o_model_or, training_glm,  x6_inclusion)


size(700,500)

# Get the rectangle coordinates
rect_coords = rectangle_from_coords(0.1, 0.7, 1, 2)

# Separate the coordinates into x and y arrays
x_rect = rect_coords[:, 1]
y_rect = rect_coords[:, 2]

plt = latent_propensity_ovrlay_no(z, probabilities_e, 2500, E, y, "A) Features selected by E", true, args.grid_point_size)

# Plot the rectangle
# plt = Plots.plot!(x_rect, y_rect, seriestype=:shape, fillalpha=0, color=:red, linecolor = :red, lw = 5, label=false)

display(plt)


plt2 = latent_propensity_ovrlay(z, probabilities_e_o_and, 2500, E, y, "B) Features selected by E-O", true, args.grid_point_size)
# plt2 = Plots.plot!(x_rect, y_rect, seriestype=:shape, fillalpha=0, color=:red, linecolor = :red, lw = 5, label=false)

display(plt2)


In [None]:
figure4 = Plots.plot(plt, plt2, layout = (1, 2), size = (1400, 500))

In [None]:
Plots.savefig(figure4, "./figures/Figure4_exposure_outcome_quantile_n_quantile$(preprocess_ps.n_quantiles).png")
Plots.savefig(figure4, "./figures/Figure4_exposure_outcome_quantile_n_quantile$(preprocess_ps.n_quantiles).pdf")

In [None]:
# Set the input and output filenames
input_pdf =  "./figures/Figure4_exposure_outcome_quantile_n_quantile$(preprocess_ps.n_quantiles).pdf"
output_eps =  "./figures/Figure4_exposure_outcome_quantile_n_quantile$(preprocess_ps.n_quantiles).eps"

# Construct the Ghostscript command
command = `gs -dNOPAUSE -dBATCH -dEPSCrop -r300 -sDEVICE=eps2write -sOutputFile=$output_eps $input_pdf`

# Run the command
run(command)