# Selective Inference via outcome Randomisation and Mirror Statistics

In [1]:
using Pkg
Pkg.status()

using GLM
using GLMNet
using Distributions
using Random
using StatsPlots
using Plots

[36m[1mProject[22m[39m selective_inference_project v0.1.0
[32m[1mStatus[22m[39m `~/Documents/UiO_Postdoc/Code/git_repositories/SelectiveInference/Project.toml`
  [90m[31c24e10] [39mDistributions v0.25.100
  [90m[38e38edf] [39mGLM v1.8.3
  [90m[8d5ece8b] [39mGLMNet v0.7.2
  [90m[6f49c342] [39mRCall v0.13.17
  [90m[f3b207a7] [39mStatsPlots v0.15.6


In [2]:
include("./utilities/data_generation.jl")
include("./utilities/randomisation_ds.jl")
include("./utilities/mirror_statistic.jl")
include("./utilities/classification_metrics.jl")

Main.classification_metrics

In [29]:
include("./wrapper_pipeline_inference.jl")



Main.wrapper_pipeline_inference

## Simulation on uncorrelated covariates

### Low-dimensional case
30% of coefficients are 0

In [9]:
n = 100
p = 20
prop_zero_coef=0.3
beta_intercept=1.
sigma2=1.
correlation_coefficients=[]

Any[]

In [18]:
Random.seed!(1345)
result_nocor_lowdim = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
)

(data = (y = [-1.6260520750379444, 1.5513485288028777, -3.3549617891637933, -2.558203514910732, 0.8193702694754683, 0.5492995765088977, 2.3027167730378, -1.936460150077794, -0.5336458266816524, -0.6940018876616174  …  0.562446371253111, -4.545435297892872, -6.0754466928939985, 2.886321624177655, 1.5833320459313067, -1.6946873476255195, -0.8549548337934234, 1.68551956137977, -0.3257784767317786, -1.537740333319479], X = [-0.7889360905644462 -0.26472418834451894 … -1.2343780301897211 -1.121415444437855; -1.21593802500608 -0.1597627796996075 … 0.03453464133427012 0.058641040784560776; … ; 1.4309369596399277 0.22850968111922954 … 0.15581169767454967 0.3491198851157748; -0.8186600875734007 -0.4152378055351885 … 1.9262423549083438 -0.10961298046939277], beta_true = [0.0, -1.0, -0.5, 0.5, -0.8, 0.0, -0.8, 0.0, 0.5, 0.8, -1.0, 0.0, 0.5, -0.8, -1.0, -1.0, 0.8, 0.0, 0.0, -0.5]), fdr_mirror_statistic = 0.125, fdr_randomisation_bh = 0.0, fdr_randomisation_raw = 0.0)

In [21]:
fdr_mirror_statistic = result_nocor_lowdim.fdr_mirror_statistic
println("FDR using Mirror Statistic: $fdr_mirror_statistic")

fdr_randomisation_raw = result_nocor_lowdim.fdr_randomisation_raw
println("FDR using Randomisation Raw: $fdr_randomisation_raw")

fdr_randomisation_bh = result_nocor_lowdim.fdr_randomisation_bh
println("FDR using Randomisation with BH: $fdr_randomisation_bh")


FDR using Mirror Statistic: 0.125
FDR using Randomisation Raw: 0.0
FDR using Randomisation with BH: 0.0


### High-dimensional case
70% of coefficients are 0

In [22]:
n = 100
p = 200
prop_zero_coef=0.7
beta_intercept=1.
sigma2=1.
correlation_coefficients=[]

Random.seed!(1345)
result_nocor_highdim = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
)

(data = (y = [7.379060581983223, 9.034040359173744, -0.9985241118877548, -2.807702330136512, 7.039050202171682, -7.066432235905209, -2.2002451680224135, -3.129981020928404, -1.9201987562392855, 5.396975774043816  …  15.354354993551368, 10.133163476845853, 9.230690264511408, -1.493114251081702, 0.6972110663377702, 10.718813722924901, 4.071925624909591, 1.4759028887785635, 4.713664808356412, 5.208262579945975], X = [0.24898080739663456 0.1846030772413031 … -0.23112332976524586 -0.319119047569456; -1.4527032413296281 0.13198117152228667 … -0.7535129243130844 0.18880080839509594; … ; 0.4547520463619798 -2.3730584237820316 … 0.5195930145103993 0.21219032117118536; -0.6877661026982355 1.8625385222887076 … 2.3286871420216597 1.1849808419323373], beta_true = [-0.5, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, -0.8, 0.0, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0]), fdr_mirror_statistic = 0.38181818181818183, fdr_randomisation_bh = 0.0, fdr_randomisation_raw = 0.3125)

In [23]:
fdr_mirror_statistic = result_nocor_highdim.fdr_mirror_statistic
println("FDR using Mirror Statistic: $fdr_mirror_statistic")

fdr_randomisation_raw = result_nocor_highdim.fdr_randomisation_raw
println("FDR using Randomisation Raw: $fdr_randomisation_raw")

fdr_randomisation_bh = result_nocor_highdim.fdr_randomisation_bh
println("FDR using Randomisation with BH: $fdr_randomisation_bh")


FDR using Mirror Statistic: 0.38181818181818183
FDR using Randomisation Raw: 0.3125
FDR using Randomisation with BH: 0.0


## Correlated covariates

### Low-dimensional case
30% of coefficients are 0

In [24]:
n = 100
p = 20
prop_zero_coef=0.3
beta_intercept=1.
sigma2=1.
correlation_coefficients=[0.5, 0.3]

Random.seed!(1345)
result_cor_lowdim = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
)

(data = (y = [2.074586274240772, 2.78787539318058, -0.3578202109207673, -3.5867805239606003, 0.07499232615071616, -0.2766554671478394, 2.5657679265671165, -2.1010074136363315, 0.41109593736424654, -0.36086385764007356  …  -0.8999964406674992, -4.215192404438797, -7.2543432892550985, 6.322607528521161, 1.0835627131274532, -0.332971970936051, -0.4491621039361495, 2.6216229741818657, -0.27146349077678744, -0.5289623329124924], X = [-0.7889360905644462 -0.6237259173847929 … -1.344434800890353 -1.7824186749129203; -1.21593802500608 -0.7463276383021169 … 0.2744360099918829 0.3697166255104425; … ; 1.4309369596399277 0.9133636686798979 … -0.27684890742196167 0.31965085295331574; -0.8186600875734007 -0.7689365319918762 … 0.4423602279658192 0.24741932927412513], beta_true = [0.0, -1.0, -0.5, 0.5, -0.8, 0.0, -0.8, 0.0, 0.5, 0.8, -1.0, 0.0, 0.5, -0.8, -1.0, -1.0, 0.8, 0.0, 0.0, -0.5]), fdr_mirror_statistic = 0.17647058823529413, fdr_randomisation_bh = 0.0, fdr_randomisation_raw = 0.0)

In [25]:
fdr_mirror_statistic = result_cor_lowdim.fdr_mirror_statistic
println("FDR using Mirror Statistic: $fdr_mirror_statistic")

fdr_randomisation_raw = result_cor_lowdim.fdr_randomisation_raw
println("FDR using Randomisation Raw: $fdr_randomisation_raw")

fdr_randomisation_bh = result_cor_lowdim.fdr_randomisation_bh
println("FDR using Randomisation with BH: $fdr_randomisation_bh")


FDR using Mirror Statistic: 0.17647058823529413
FDR using Randomisation Raw: 0.0
FDR using Randomisation with BH: 0.0


### High-dimensional case
70% of coefficients are 0

In [30]:
n = 100
p = 200
prop_zero_coef=0.7
beta_intercept=1.
sigma2=1.
correlation_coefficients=[0.5, 0.3]

Random.seed!(1345)
result_cor_highdim = wrapper_pipeline_inference.wrapper_randomisation_inference(
    n=n,
    p=p,
    correlation_coefficients=correlation_coefficients,
    prop_zero_coef=prop_zero_coef,
    sigma2=sigma2,
    gamma_randomisation=1.,
    fdr_level=0.1
)

(data = (y = [9.173756724412282, 14.279613632233227, -3.3777843397658724, -1.357098145252965, 2.3715729625709585, -3.3340835628786865, -3.0710934051392, 0.9251036984011431, 1.0837197075931972, 1.013041121982387  …  12.049696844044497, 7.343995681462049, 7.942746672335299, -2.4451334884309266, 5.912168818808471, 14.882393058244766, 8.74953496963849, -1.163738355516755, 6.185202051129609, 3.65614529905947], X = [0.24898080739663456 0.28436135820606667 … -0.39710030273757274 -0.2008930022031271; -1.4527032413296281 -0.6120525733052825 … -0.4487489156288165 0.3937891173031858; … ; 0.4547520463619798 -1.8277528564789074 … 0.6857174447358722 0.3279143899209045; -0.6877661026982355 1.2691226244800318 … 1.1293414378067999 1.2256994491917967], beta_true = [-0.5, 0.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, -0.8, 0.0, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0]), lasso_coef = [-0.3429824235788078, 0.0, -0.14707287366634694, 0.0, -0.06971301861765271, 0.0, 0.0, 0.41364250721974744, 0.26339934565

In [31]:
fdr_mirror_statistic = result_cor_highdim.fdr_mirror_statistic
println("FDR using Mirror Statistic: $fdr_mirror_statistic")

fdr_randomisation_raw = result_cor_highdim.fdr_randomisation_raw
println("FDR using Randomisation Raw: $fdr_randomisation_raw")

fdr_randomisation_bh = result_cor_highdim.fdr_randomisation_bh
println("FDR using Randomisation with BH: $fdr_randomisation_bh")


FDR using Mirror Statistic: 0.3333333333333333
FDR using Randomisation Raw: NaN
FDR using Randomisation with BH: NaN


In [51]:
result_cor_highdim.lm_pvalues[result_cor_highdim.lm_pvalues .< 1.]

# findall(isnan.(result_cor_highdim.lm_coef) .== 1)
findall(isnan.(result_cor_highdim.lm_pvalues) .== 1)


3-element Vector{Int64}:
  33
  43
 109