In [1]:
from examples.instances import gaussian_instance
from src.carving_class import linear_carving

## Simulate a dataset

In [2]:
n = 300
p = 100
sparsity = 5
seed = 2023
X, Y, beta = gaussian_instance(n=n, p=p, s=sparsity, signal=2., rho=0.3, sigma=1., random_signs=True, seed=seed, scale=True, equicorrelated=False)[:3]

## Variable selection by the lasso

We use a subset of 80\% data for the lasso.

In [3]:
n1 = int(n * 0.8)
carving = linear_carving(X, Y, n1)  # create a class for carving, n1 is the number of samples for the lasso
carving.fit('theory') # lasso regularization parameter: 'theory' or 'cv_min'
d = carving.d
print("Selected", d, "variables")
carving.prepare_inference()  # prepare for inference

Selected 8 variables


## Inference for the selected target

Inference target is
$$
(X_M^\top X_M)^{-1}X_M^\top \beta
$$

### CDF-based inference

In [4]:
infer_result = carving.sampling_inference(sig_level=0.05)
infer_result

Unnamed: 0,lower_confidence,upper_confidence,pvalue
0,-2.658346,4.639177,0.651295
1,-3.503131,1.216837,0.229666
2,-3.483719,3.220927,0.929288
3,-1.760332,5.883765,0.314952
4,-4.181609,2.577129,0.569059
5,-3.639189,3.199544,0.792631
6,-5.38493,0.043893,0.053153
7,-6.390008,1.304524,0.207138


### Selective MLE

In [5]:
infer_mle = carving.mle_sov(sig_level=0.05)
infer_mle

Unnamed: 0,MLE,lower_confidence,upper_confidence,pvalues
0,-1.558771,-4.337676,1.220134,0.271593
1,-2.690048,-4.967464,-0.412632,0.020609
2,1.461979,-1.279769,4.203726,0.295972
3,-1.203979,-4.211109,1.80315,0.432617
4,-1.719815,-4.354854,0.915224,0.200822
5,-1.574532,-4.323433,1.174368,0.26159
6,-2.301545,-4.678369,0.07528,0.057711
7,0.879544,-2.159609,3.918697,0.570563
