# Selective Inference via outcome Randomisation and Mirror Statistics

In [1]:
using Pkg
Pkg.status()

using GLM
using GLMNet
using Distributions
using Random
using StatsPlots
using Plots

[36m[1mProject[22m[39m selective_inference_project v0.1.0
[32m[1mStatus[22m[39m `~/Documents/UiO_Postdoc/Code/git_repositories/SelectiveInference/Project.toml`
[32m⌃[39m [90m[31c24e10] [39mDistributions v0.25.100
[32m⌃[39m [90m[38e38edf] [39mGLM v1.8.3
  [90m[8d5ece8b] [39mGLMNet v0.7.2
[32m⌃[39m [90m[6f49c342] [39mRCall v0.13.17
  [90m[f3b207a7] [39mStatsPlots v0.15.6
[36m[1mInfo[22m[39m Packages marked with [32m⌃[39m have new versions available and may be upgradable.


In [2]:
include("./utilities/data_generation.jl")
include("./utilities/randomisation_ds.jl")
include("./utilities/mirror_statistic.jl")
include("./utilities/classification_metrics.jl")

Main.classification_metrics

In [3]:
include("./wrapper_pipeline_inference.jl")

Main.wrapper_pipeline_inference

In [10]:
function print_metrics(;scenario, results)
    println("-------------------- $scenario --------------------")

    fdr_mirror_statistic = round(results.fdr_mirror_statistic, digits=3)
    println("FDR using Mirror Statistic: $fdr_mirror_statistic")

    fdr_randomisation_raw = round(results.fdr_randomisation_raw, digits=3)
    println("FDR using Randomisation Raw: $fdr_randomisation_raw")

    fdr_randomisation_bh = round(results.fdr_randomisation_bh, digits=3)
    println("FDR using Randomisation with BH: $fdr_randomisation_bh")

    tpr_mirror_statistic = round(results.tpr_mirror_statistic, digits=3)
    println("TPR using Mirror Statistic: $tpr_mirror_statistic")

    tpr_randomisation_raw = round(results.tpr_randomisation_raw, digits=3)
    println("TPR using Randomisation: $tpr_randomisation_raw")
end

print_metrics (generic function with 1 method)

## Simulation on uncorrelated covariates

### Low-dimensional case
30% of coefficients are 0

In [12]:
n = 100
p = 20
prop_zero_coef = 0.3
beta_intercept = 1.
sigma2 = 1.
correlation_coefficients = []
scenario = "Low-Dimensional, No Correlation"

"Low-Dimensional, No Correlation"

In [13]:
Random.seed!(1345)
results = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
);

In [14]:
print_metrics(scenario=scenario, results=results)

-------------------- Low-Dimensional, No Correlation --------------------
FDR using Mirror Statistic: 0.125
FDR using Randomisation Raw: 0.0
FDR using Randomisation with BH: 0.0
TPR using Mirror Statistic: 1.0
TPR using Randomisation: 1.0


### High-dimensional case
70% of coefficients are 0

In [15]:
n = 100
p = 200
prop_zero_coef=0.7
beta_intercept=1.
sigma2=1.
correlation_coefficients=[]
scenario = "High-Dimensional, No Correlation"

Random.seed!(1345)
results = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
);

In [16]:
print_metrics(scenario=scenario, results=results)

-------------------- High-Dimensional, No Correlation --------------------
FDR using Mirror Statistic: 0.382
FDR using Randomisation Raw: 0.312
FDR using Randomisation with BH: 0.0
TPR using Mirror Statistic: 0.567
TPR using Randomisation: 0.183


## Correlated covariates

### Low-dimensional case
30% of coefficients are 0

In [17]:
n = 100
p = 20
prop_zero_coef=0.3
beta_intercept=1.
sigma2=1.
correlation_coefficients=[0.5, 0.3]
scenario = "Low-Dimensional, With Correlation"

Random.seed!(1345)
results = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
);

In [18]:
print_metrics(scenario=scenario, results=results)

-------------------- Low-Dimensional, With Correlation --------------------
FDR using Mirror Statistic: 0.176
FDR using Randomisation Raw: 0.0
FDR using Randomisation with BH: 0.0
TPR using Mirror Statistic: 1.0
TPR using Randomisation: 0.929


### High-dimensional case
70% of coefficients are 0

In [19]:
n = 100
p = 200
prop_zero_coef=0.7
beta_intercept=1.
sigma2=1.
correlation_coefficients=[0.5, 0.3]
scenario = "High-Dimensional, With Correlation"

Random.seed!(1345)
results = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
);



In [20]:
print_metrics(scenario=scenario, results=results)

-------------------- High-Dimensional, With Correlation --------------------
FDR using Mirror Statistic: 0.333
FDR using Randomisation Raw: 0.0
FDR using Randomisation with BH: 0.0
TPR using Mirror Statistic: 0.1
TPR using Randomisation: 0.0
