In [1]:
from modules import gendata, SI_SeqFS_DA, DA_SeqFS
import numpy as np

## Forward Sequential Selection

### SI-SeqFS-DA with Forward Selection

In [2]:
ns = 30
nt = 15
truebeta= [1, 0,  2, 0]
p = len(truebeta)
print('true beta:', truebeta)

true_beta_s = np.full((p,1), 2) #source's beta
true_beta_t = np.array(truebeta).reshape((-1,1)) #target's beta

K = 2 # select k features
# generate data
Xs, Xt, Ys, Yt, Sigma_s, Sigma_t = gendata.generate(ns, nt, p, true_beta_s, true_beta_t)

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='forward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='forward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 0.00011854898347873899
    p-value of feature 2: 8.215650382226158e-15


### SI-SeqFS-DA with Forward Selection based Akaike Information Criterion

In [3]:
print('true beta:', truebeta)

K = 'AIC' # use AIC to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='forward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='forward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2, 3}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 0.0034794091699619223
    p-value of feature 2: 3.946598603477014e-11
    p-value of feature 3: 0.5956429431107668


### SI-SeqFS-DA with Forward Selection based Bayesian Information Criterion

In [4]:
print('true beta:', truebeta)

K = 'BIC' # use BIC to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='forward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='forward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2, 3}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 0.9841180439703707
    p-value of feature 2: 7.898339442480307e-07
    p-value of feature 3: 0.9490315619966485


### SI-SeqFS-DA with Forward Selection based Adjusted $R^2$

In [5]:
print('true beta:', truebeta)

K = 'Adjusted R2' # use AdjR2 to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='forward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='forward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2, 3}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 0.00035905873206365513
    p-value of feature 2: 6.390443729742401e-13
    p-value of feature 3: 0.47026241389262746


## Backward Sequential Selection

### SI-SeqFS-DA with Backward Selection

In [6]:
ns = 30
nt = 15
truebeta= [1, 0,  2, 0]
p = len(truebeta)
print('true beta:', truebeta)

true_beta_s = np.full((p,1), 2) #source's beta
true_beta_t = np.array(truebeta).reshape((-1,1)) #target's beta

K = 2 # select k features
# generate data
Xs, Xt, Ys, Yt, Sigma_s, Sigma_t = gendata.generate(ns, nt, p, true_beta_s, true_beta_t)

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='backward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='backward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 2.0592767935312395e-07
    p-value of feature 2: 2.625055728344705e-11


### SI-SeqFS-DA with Backward Selection based Akaike Information Criterion

In [7]:
print('true beta:', truebeta)

K = 'AIC' # use AIC to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='backward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='backward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 2.3112021452220688e-07
    p-value of feature 2: 2.7891244869238108e-11


### SI-SeqFS-DA with Backward Selection based Bayesian Information Criterion

In [8]:
print('true beta:', truebeta)

K = 'BIC' # use BIC to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='backward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='backward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 2}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 3.4570321938431903e-07
    p-value of feature 2: 4.427236355297737e-11


### SI-SeqFS-DA with Backward Selection based Adjusted $R^2$

In [9]:
print('true beta:', truebeta)

K = 'Adjusted R2' # use AdjR2 to select features

# apply DA-SeqFS to select features
list_selected_features = DA_SeqFS.DA_SeqFS(Xs, Ys, Xt, Yt, Sigma_s, Sigma_t,K, method='backward')

# SI-SeqFS-DA to calculate p-value of the selected features
print('Index of selected features:', '{' + ', '.join(map(str, list_selected_features)) + '}')
print('Applying SI-SeqFS-DA to calculate p-value of the selected features:')
for j in range(len(list_selected_features)):
    pvalue = SI_SeqFS_DA.SI_SeqFS_DA(Xs, Ys, Xt, Yt, K, Sigma_s, Sigma_t, method='backward', jth = j)
    print(f'    p-value of feature {list_selected_features[j]}: {pvalue}')

true beta: [1, 0, 2, 0]
Index of selected features: {0, 1, 2}
Applying SI-SeqFS-DA to calculate p-value of the selected features:
    p-value of feature 0: 2.251991701118783e-06
    p-value of feature 1: 0.627039414960533
    p-value of feature 2: 2.4596547021360493e-11
